diff --git a/image_processing.py b/image_processing.py index 5275512..574109f 100644 --- a/image_processing.py +++ b/image_processing.py @@ -44,7 +44,11 @@ def convert_image(image_path): def ocr_image(image_path): logging.info(f"OCR'ing {image_path}...") - text_filename = os.path.basename(image_path).replace(".jpg", ".txt") + + base_name = os.path.basename(image_path) + root_name, _ = os.path.splitext(base_name) + text_filename = f"{root_name}.txt" + text_path = os.path.join(CONVERTED_DIR, text_filename) cmd = ["tesseract", image_path, text_path.replace(".txt", "")] try: