From 963d99404e44e5e47cab03481aee996356ee5160 Mon Sep 17 00:00:00 2001 From: Benjamin Dweck Date: Thu, 21 Sep 2023 14:42:47 +0300 Subject: [PATCH] BUGFIX: image processing only handles filenames with jpg --- image_processing.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/image_processing.py b/image_processing.py index 5275512..574109f 100644 --- a/image_processing.py +++ b/image_processing.py @@ -44,7 +44,11 @@ def convert_image(image_path): def ocr_image(image_path): logging.info(f"OCR'ing {image_path}...") - text_filename = os.path.basename(image_path).replace(".jpg", ".txt") + + base_name = os.path.basename(image_path) + root_name, _ = os.path.splitext(base_name) + text_filename = f"{root_name}.txt" + text_path = os.path.join(CONVERTED_DIR, text_filename) cmd = ["tesseract", image_path, text_path.replace(".txt", "")] try: