Compare commits
	
		
			No commits in common. "51401ba96479fa92b46fe262217e69ff0376cf41" and "b3ac05a4636ce2a6ecaace74514875874f692b20" have entirely different histories.
		
	
	
		
			51401ba964
			...
			b3ac05a463
		
	
		
							
								
								
									
										27
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								README.md
									
									
									
									
									
								
							| 
						 | 
					@ -1,14 +1,11 @@
 | 
				
			||||||
# csv2ankicards
 | 
					# csv2ankicards
 | 
				
			||||||
 | 
					
 | 
				
			||||||
A simple toolkit that offers:
 | 
					A simple tool to convert CSV files into Anki deck packages (.apkg files).
 | 
				
			||||||
- Conversion of CSV files into Anki deck packages (.apkg files).
 | 
					 | 
				
			||||||
- Conversion of image files in a directory to a text file using Optical Character Recognition (OCR).
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Features
 | 
					## Features
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- Converts a CSV file with questions and answers into an Anki deck package.
 | 
					- Converts a CSV file with questions and answers into an Anki deck package.
 | 
				
			||||||
- Converts image files from a specified directory to a single text file using OCR.
 | 
					- There are only two columns in the CSV file, separated by the first comma encountered.
 | 
				
			||||||
- For CSV: there are only two columns in the CSV file, separated by the first comma encountered.
 | 
					 | 
				
			||||||
- CSV files should have a "Front" column for questions and a "Back" column for answers.
 | 
					- CSV files should have a "Front" column for questions and a "Back" column for answers.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Installation
 | 
					## Installation
 | 
				
			||||||
| 
						 | 
					@ -32,8 +29,6 @@ A simple toolkit that offers:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Usage
 | 
					## Usage
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### CSV to Anki Conversion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To convert a CSV file into an Anki deck package:
 | 
					To convert a CSV file into an Anki deck package:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```bash
 | 
					```bash
 | 
				
			||||||
| 
						 | 
					@ -42,33 +37,19 @@ python csv2ankicards.py /path/to/your/csvfile.csv output.apkg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This will produce an `output.apkg` file which can then be imported into Anki.
 | 
					This will produce an `output.apkg` file which can then be imported into Anki.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#### CSV Format
 | 
					### CSV Format
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The CSV file should follow this format:
 | 
					The CSV file should follow this format:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
Front,Back
 | 
					Front,Back
 | 
				
			||||||
Your question here,Your answer here
 | 
					Your question here,Your answer here, and here
 | 
				
			||||||
Another question,list of: answer1, answer2, answer3
 | 
					Another question,list of: answer1, answer2, answer3
 | 
				
			||||||
...
 | 
					...
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
**Note:** If your answers contain commas, they will be considered as part of the answer. Only the first comma is used to separate the question from the answer.
 | 
					**Note:** If your answers contain commas, they will be considered as part of the answer. Only the first comma is used to separate the question from the answer.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
### Image to Text Conversion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To convert images from a directory to a single text file using OCR:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```bash
 | 
					 | 
				
			||||||
python images2text.py /path/to/your/image_directory/
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This will produce a `final.txt` file which contains the text extracted from the images.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#### Supported Image Formats
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Currently supported formats for the images are: `.png`, `.jpg`, and `.jpeg`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## License
 | 
					## License
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[MIT License](LICENSE)
 | 
					[MIT License](LICENSE)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,85 +0,0 @@
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
from subprocess import run, CalledProcessError
 | 
					 | 
				
			||||||
from concurrent.futures import ThreadPoolExecutor
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
converted_dir = "converted"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_image_file(path):
 | 
					 | 
				
			||||||
    lower_path = path.lower()
 | 
					 | 
				
			||||||
    return lower_path.endswith('.png') or lower_path.endswith('.jpg') or lower_path.endswith('.jpeg')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def convert_image(image_path):
 | 
					 | 
				
			||||||
    print(f"Converting {image_path}...")
 | 
					 | 
				
			||||||
    converted_path = os.path.join(converted_dir, os.path.basename(image_path))
 | 
					 | 
				
			||||||
    cmd = [
 | 
					 | 
				
			||||||
        "convert",
 | 
					 | 
				
			||||||
        image_path,
 | 
					 | 
				
			||||||
        "-colorspace", "Gray",
 | 
					 | 
				
			||||||
        "-resize", "300%",
 | 
					 | 
				
			||||||
        "-threshold", "55%",
 | 
					 | 
				
			||||||
        "-type", "Grayscale",
 | 
					 | 
				
			||||||
        converted_path
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        run(cmd, check=True)
 | 
					 | 
				
			||||||
        print(f"Converted image output to {converted_path}!")
 | 
					 | 
				
			||||||
        return converted_path
 | 
					 | 
				
			||||||
    except CalledProcessError:
 | 
					 | 
				
			||||||
        print(f"Error converting {image_path} with ImageMagick. Using original for Tesseract.")
 | 
					 | 
				
			||||||
        return image_path
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def ocr_image(image_path):
 | 
					 | 
				
			||||||
    print(f"OCR'ing {image_path}...")
 | 
					 | 
				
			||||||
    text_filename = os.path.basename(image_path).replace(".jpg", ".txt")
 | 
					 | 
				
			||||||
    text_path = os.path.join(converted_dir, text_filename)
 | 
					 | 
				
			||||||
    cmd = ["tesseract", image_path, text_path.replace(".txt", "")]
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        run(cmd, check=True)
 | 
					 | 
				
			||||||
        print(f"OCRed to {text_path}!")
 | 
					 | 
				
			||||||
        return text_path
 | 
					 | 
				
			||||||
    except CalledProcessError:
 | 
					 | 
				
			||||||
        print(f"Error processing {image_path} with Tesseract. Skipping.")
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def process_image(image_path):
 | 
					 | 
				
			||||||
    converted_path = convert_image(image_path)
 | 
					 | 
				
			||||||
    print(f"OCR'ing image {image_path} (now at {converted_path})...")
 | 
					 | 
				
			||||||
    text_path = ocr_image(converted_path)
 | 
					 | 
				
			||||||
    if text_path and os.path.exists(text_path):
 | 
					 | 
				
			||||||
        with open(text_path, 'r') as text_file:
 | 
					 | 
				
			||||||
            text_content = text_file.read()
 | 
					 | 
				
			||||||
            print(f"Added text from {text_path} to final output.")
 | 
					 | 
				
			||||||
            return text_content
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        print(f"Cannot locate {text_path}! Cannot add text to final output!")
 | 
					 | 
				
			||||||
        return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def main(directory_path):
 | 
					 | 
				
			||||||
    final_text = []
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if not os.path.exists(converted_dir):
 | 
					 | 
				
			||||||
        os.mkdir(converted_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    image_paths = []
 | 
					 | 
				
			||||||
    for root, dirs, files in os.walk(directory_path):
 | 
					 | 
				
			||||||
        for file in files:
 | 
					 | 
				
			||||||
            image_path = os.path.join(root, file)
 | 
					 | 
				
			||||||
            if is_image_file(image_path):
 | 
					 | 
				
			||||||
                image_paths.append(image_path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Use a ThreadPoolExecutor to process images in parallel
 | 
					 | 
				
			||||||
    with ThreadPoolExecutor() as executor:
 | 
					 | 
				
			||||||
        final_text = list(executor.map(process_image, image_paths))
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    # Filter out any None values and write the text to final.txt
 | 
					 | 
				
			||||||
    final_text = [text for text in final_text if text is not None]
 | 
					 | 
				
			||||||
    with open("final.txt", 'w') as f:
 | 
					 | 
				
			||||||
        f.write("\n".join(final_text))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if __name__ == "__main__":
 | 
					 | 
				
			||||||
    if len(sys.argv) != 2:
 | 
					 | 
				
			||||||
        print("Usage: python images2text.py <directory_path>")
 | 
					 | 
				
			||||||
        sys.exit(1)
 | 
					 | 
				
			||||||
    main(sys.argv[1])
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,2 +1 @@
 | 
				
			||||||
genanki==0.8.0
 | 
					genanki==0.8.0
 | 
				
			||||||
Pillow
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user