Compare commits
No commits in common. "51401ba96479fa92b46fe262217e69ff0376cf41" and "b3ac05a4636ce2a6ecaace74514875874f692b20" have entirely different histories.
51401ba964
...
b3ac05a463
29
README.md
29
README.md
|
@ -1,14 +1,11 @@
|
||||||
# csv2ankicards
|
# csv2ankicards
|
||||||
|
|
||||||
A simple toolkit that offers:
|
A simple tool to convert CSV files into Anki deck packages (.apkg files).
|
||||||
- Conversion of CSV files into Anki deck packages (.apkg files).
|
|
||||||
- Conversion of image files in a directory to a text file using Optical Character Recognition (OCR).
|
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Converts a CSV file with questions and answers into an Anki deck package.
|
- Converts a CSV file with questions and answers into an Anki deck package.
|
||||||
- Converts image files from a specified directory to a single text file using OCR.
|
- There are only two columns in the CSV file, separated by the first comma encountered.
|
||||||
- For CSV: there are only two columns in the CSV file, separated by the first comma encountered.
|
|
||||||
- CSV files should have a "Front" column for questions and a "Back" column for answers.
|
- CSV files should have a "Front" column for questions and a "Back" column for answers.
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
@ -32,8 +29,6 @@ A simple toolkit that offers:
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### CSV to Anki Conversion
|
|
||||||
|
|
||||||
To convert a CSV file into an Anki deck package:
|
To convert a CSV file into an Anki deck package:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -42,37 +37,23 @@ python csv2ankicards.py /path/to/your/csvfile.csv output.apkg
|
||||||
|
|
||||||
This will produce an `output.apkg` file which can then be imported into Anki.
|
This will produce an `output.apkg` file which can then be imported into Anki.
|
||||||
|
|
||||||
#### CSV Format
|
### CSV Format
|
||||||
|
|
||||||
The CSV file should follow this format:
|
The CSV file should follow this format:
|
||||||
|
|
||||||
```
|
```
|
||||||
Front,Back
|
Front,Back
|
||||||
Your question here,Your answer here
|
Your question here,Your answer here, and here
|
||||||
Another question,list of: answer1, answer2, answer3
|
Another question,list of: answer1, answer2, answer3
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
|
|
||||||
**Note:** If your answers contain commas, they will be considered as part of the answer. Only the first comma is used to separate the question from the answer.
|
**Note:** If your answers contain commas, they will be considered as part of the answer. Only the first comma is used to separate the question from the answer.
|
||||||
|
|
||||||
### Image to Text Conversion
|
|
||||||
|
|
||||||
To convert images from a directory to a single text file using OCR:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python images2text.py /path/to/your/image_directory/
|
|
||||||
```
|
|
||||||
|
|
||||||
This will produce a `final.txt` file which contains the text extracted from the images.
|
|
||||||
|
|
||||||
#### Supported Image Formats
|
|
||||||
|
|
||||||
Currently supported formats for the images are: `.png`, `.jpg`, and `.jpeg`.
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
[MIT License](LICENSE)
|
[MIT License](LICENSE)
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
|
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
|
||||||
|
|
|
@ -1,85 +0,0 @@
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from subprocess import run, CalledProcessError
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
converted_dir = "converted"
|
|
||||||
|
|
||||||
def is_image_file(path):
|
|
||||||
lower_path = path.lower()
|
|
||||||
return lower_path.endswith('.png') or lower_path.endswith('.jpg') or lower_path.endswith('.jpeg')
|
|
||||||
|
|
||||||
def convert_image(image_path):
|
|
||||||
print(f"Converting {image_path}...")
|
|
||||||
converted_path = os.path.join(converted_dir, os.path.basename(image_path))
|
|
||||||
cmd = [
|
|
||||||
"convert",
|
|
||||||
image_path,
|
|
||||||
"-colorspace", "Gray",
|
|
||||||
"-resize", "300%",
|
|
||||||
"-threshold", "55%",
|
|
||||||
"-type", "Grayscale",
|
|
||||||
converted_path
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
|
||||||
run(cmd, check=True)
|
|
||||||
print(f"Converted image output to {converted_path}!")
|
|
||||||
return converted_path
|
|
||||||
except CalledProcessError:
|
|
||||||
print(f"Error converting {image_path} with ImageMagick. Using original for Tesseract.")
|
|
||||||
return image_path
|
|
||||||
|
|
||||||
def ocr_image(image_path):
|
|
||||||
print(f"OCR'ing {image_path}...")
|
|
||||||
text_filename = os.path.basename(image_path).replace(".jpg", ".txt")
|
|
||||||
text_path = os.path.join(converted_dir, text_filename)
|
|
||||||
cmd = ["tesseract", image_path, text_path.replace(".txt", "")]
|
|
||||||
try:
|
|
||||||
run(cmd, check=True)
|
|
||||||
print(f"OCRed to {text_path}!")
|
|
||||||
return text_path
|
|
||||||
except CalledProcessError:
|
|
||||||
print(f"Error processing {image_path} with Tesseract. Skipping.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def process_image(image_path):
|
|
||||||
converted_path = convert_image(image_path)
|
|
||||||
print(f"OCR'ing image {image_path} (now at {converted_path})...")
|
|
||||||
text_path = ocr_image(converted_path)
|
|
||||||
if text_path and os.path.exists(text_path):
|
|
||||||
with open(text_path, 'r') as text_file:
|
|
||||||
text_content = text_file.read()
|
|
||||||
print(f"Added text from {text_path} to final output.")
|
|
||||||
return text_content
|
|
||||||
else:
|
|
||||||
print(f"Cannot locate {text_path}! Cannot add text to final output!")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def main(directory_path):
|
|
||||||
final_text = []
|
|
||||||
|
|
||||||
if not os.path.exists(converted_dir):
|
|
||||||
os.mkdir(converted_dir)
|
|
||||||
|
|
||||||
image_paths = []
|
|
||||||
for root, dirs, files in os.walk(directory_path):
|
|
||||||
for file in files:
|
|
||||||
image_path = os.path.join(root, file)
|
|
||||||
if is_image_file(image_path):
|
|
||||||
image_paths.append(image_path)
|
|
||||||
|
|
||||||
# Use a ThreadPoolExecutor to process images in parallel
|
|
||||||
with ThreadPoolExecutor() as executor:
|
|
||||||
final_text = list(executor.map(process_image, image_paths))
|
|
||||||
|
|
||||||
# Filter out any None values and write the text to final.txt
|
|
||||||
final_text = [text for text in final_text if text is not None]
|
|
||||||
with open("final.txt", 'w') as f:
|
|
||||||
f.write("\n".join(final_text))
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
if len(sys.argv) != 2:
|
|
||||||
print("Usage: python images2text.py <directory_path>")
|
|
||||||
sys.exit(1)
|
|
||||||
main(sys.argv[1])
|
|
|
@ -1,2 +1 @@
|
||||||
genanki==0.8.0
|
genanki==0.8.0
|
||||||
Pillow
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user