extracted contants and added logging

2023-09-11 20:02:17 +03:00 · 2023-09-11 20:02:17 +03:00 · a24d0ea96f
commit a24d0ea96f
parent c3bdf2dd5e
8 changed files with 96 additions and 44 deletions
--- a/ankiai.py
+++ b/ankiai.py
@ -1,16 +1,19 @@
 import sys
+import logging

+from logging_config import setup_logging
 from images2text import main as ocr_images
 from prompt4cards import prompt_for_card_content, response_to_json
 from json2deck import to_package

+setup_logging()

 def images_to_package(directory_path, outfile):
    ocr_text = ocr_images(directory_path)
    response_text = prompt_for_card_content(ocr_text)
    deck_json = response_to_json(response_text)
    to_package(deck_json).write_to_file(outfile)
-    print(f"Deck created at: {outfile}")
+    logging.info(f"Deck created at: {outfile}")


 if __name__ == "__main__":
--- a/constants.py
+++ b/constants.py
@ -0,0 +1,14 @@
+# File and Directory Constants
+CONVERTED_DIR = "converted"
+FINAL_OUTPUT = "final.txt"
+IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg']
+OUTPUT_FILENAME = "output_deck.json"
+
+# API Constants
+API_KEY_ENV = "OPENAI_API_KEY"
+CHAT_MODEL = "gpt-3.5-turbo"
+
+# Error Messages
+NO_IMAGE_PART_ERROR = 'No image part'
+NO_SELECTED_FILE_ERROR = 'No selected file'
+INVALID_FILENAME_ERROR = 'Invalid filename'
--- a/images2text.py
+++ b/images2text.py
@ -1,17 +1,20 @@
 import os
 import sys
+import logging
+
+from logging_config import setup_logging
 from subprocess import run, CalledProcessError
 from concurrent.futures import ThreadPoolExecutor
+from utilities import is_image_file, ensure_directory_exists
+from constants import CONVERTED_DIR, FINAL_OUTPUT

-converted_dir = "converted"

-def is_image_file(path):
-    lower_path = path.lower()
-    return lower_path.endswith('.png') or lower_path.endswith('.jpg') or lower_path.endswith('.jpeg')
+setup_logging()
+

 def convert_image(image_path):
-    print(f"Converting {image_path}...")
-    converted_path = os.path.join(converted_dir, os.path.basename(image_path))
+    logging.info(f"Converting {image_path}...")
+    converted_path = os.path.join(CONVERTED_DIR, os.path.basename(image_path))
    cmd = [
        "convert",
        image_path,
@ -24,43 +27,45 @@ def convert_image(image_path):
    
    try:
        run(cmd, check=True)
-        print(f"Converted image output to {converted_path}!")
+        logging.info(f"Converted image output to {converted_path}!")
        return converted_path
    except CalledProcessError:
-        print(f"Error converting {image_path} with ImageMagick. Using original for Tesseract.")
+        logging.info(f"Error converting {image_path} with ImageMagick. Using original for Tesseract.")
        return image_path

+
 def ocr_image(image_path):
-    print(f"OCR'ing {image_path}...")
+    logging.info(f"OCR'ing {image_path}...")
    text_filename = os.path.basename(image_path).replace(".jpg", ".txt")
-    text_path = os.path.join(converted_dir, text_filename)
+    text_path = os.path.join(CONVERTED_DIR, text_filename)
    cmd = ["tesseract", image_path, text_path.replace(".txt", "")]
    try:
        run(cmd, check=True)
-        print(f"OCRed to {text_path}!")
+        logging.info(f"OCRed to {text_path}!")
        return text_path
    except CalledProcessError:
-        print(f"Error processing {image_path} with Tesseract. Skipping.")
+        logging.info(f"Error processing {image_path} with Tesseract. Skipping.")
        return None

+
 def process_image(image_path):
    converted_path = convert_image(image_path)
-    print(f"OCR'ing image {image_path} (now at {converted_path})...")
+    logging.info(f"OCR'ing image {image_path} (now at {converted_path})...")
    text_path = ocr_image(converted_path)
    if text_path and os.path.exists(text_path):
        with open(text_path, 'r') as text_file:
            text_content = text_file.read()
-            print(f"Added text from {text_path} to final output.")
+            logging.info(f"Added text from {text_path} to final output.")
            return text_content
    else:
-        print(f"Cannot locate {text_path}! Cannot add text to final output!")
+        logging.info(f"Cannot locate {text_path}! Cannot add text to final output!")
        return None

+
 def main(directory_path):
    final_text = []

-    if not os.path.exists(converted_dir):
-        os.mkdir(converted_dir)
+    ensure_directory_exists(CONVERTED_DIR)

    image_paths = []
    for root, dirs, files in os.walk(directory_path):
@ -75,11 +80,10 @@ def main(directory_path):
    
    # Filter out any None values and write the text to final.txt
    final_text = [text for text in final_text if text is not None]
-    FINAL_OUTPUT = "final.txt"
    with open(FINAL_OUTPUT, 'w') as f:
        f.write("\n".join(final_text))

-    print(f"All images processed! Final output saved to {FINAL_OUTPUT}")
+    logging.info(f"All images processed! Final output saved to {FINAL_OUTPUT}")
    return final_text  # Add this line


--- a/json2deck.py
+++ b/json2deck.py
@ -1,6 +1,12 @@
 import json
 import genanki
 import sys
+import logging
+from logging_config import setup_logging
+
+
+setup_logging()
+

 # Create a new model for our cards. This is necessary for genanki.
 MY_MODEL = genanki.Model(
@ -52,4 +58,4 @@ if __name__ == "__main__":
    input_json = sys.argv[1]
    output_apkg = sys.argv[2]
    json_file_to_package(input_json).write_to_file(output_apkg)
-    print(f"Deck created at: {output_apkg}")
+    logging.info(f"Deck created at: {output_apkg}")
--- a/logging_config.py
+++ b/logging_config.py
@ -0,0 +1,11 @@
+import logging
+
+def setup_logging():
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(asctime)s [%(levelname)s] - %(module)s: %(message)s',
+                        datefmt='%Y-%m-%d %H:%M:%S')
+
+    # If you also want to save logs to a file, you can add the below lines.
+    # file_handler = logging.FileHandler('ankiai.log')
+    # file_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] - %(module)s: %(message)s'))
+    # logging.getLogger().addHandler(file_handler)
--- a/prompt4cards.py
+++ b/prompt4cards.py
@ -1,12 +1,11 @@
 import openai
-import sys
 import os
+import sys
 import json
+from constants import API_KEY_ENV, CHAT_MODEL, OUTPUT_FILENAME

-CHAT_MODEL = "gpt-3.5-turbo"
-OUTPUT_FILENAME = "output_deck.json"

-API_KEY = os.environ.get("OPENAI_API_KEY")
+API_KEY = os.environ.get(API_KEY_ENV)
 if not API_KEY:
    raise ValueError("Please set the OPENAI_API_KEY environment variable.")

@ -86,7 +85,7 @@ def response_to_json(response_text):

 if __name__ == "__main__":
    if len(sys.argv) != 2:
-        print("Usage: python text2jsondeck.py <text_file_path>")
+        print("Usage: python prompt4cards.py <text_file_path>")
        sys.exit(1)
    
    text_file_path = sys.argv[1]
--- a/server.py
+++ b/server.py
@ -1,48 +1,54 @@
+import os
+import tempfile
+import shutil
+import logging
+
+from logging_config import setup_logging
 from flask import Flask, request, send_from_directory, jsonify
 from werkzeug.utils import secure_filename
-import os
-import tempfile  
-import shutil    
-
 from ankiai import images_to_package
+from constants import IMAGE_KEY, OUTPUT_FILE, NO_IMAGE_PART_ERROR, NO_SELECTED_FILE_ERROR, INVALID_FILENAME_ERROR

+
+setup_logging()
+
+
+from logging_config import setup_logging
 app = Flask(__name__)

-IMAGE_KEY = 'image'
-OUTPUT_FILE = 'cards.apkg'
-TEMP_DIR = tempfile.mkdtemp()
-
 def save_uploaded_images(images, directory):
    for img in images:
-        # Sanitize the filename
        safe_filename = secure_filename(img.filename)
-
        if not safe_filename:
-            # Handle the case where the filename becomes empty after sanitization
-            raise ValueError("Invalid filename")
+            raise ValueError(INVALID_FILENAME_ERROR)

        filename = os.path.join(directory, safe_filename)
        img.save(filename)

+
@app.route('/deck-from-images', methods=['POST'])
 def deck_from_images():
    if IMAGE_KEY not in request.files:
-        return jsonify({'error': 'No image part'}), 400
+        return jsonify({'error': NO_IMAGE_PART_ERROR}), 400

    images = request.files.getlist(IMAGE_KEY)

    if not images or not any(img.filename != '' for img in images):
-        return jsonify({'error': 'No selected file'}), 400
+        return jsonify({'error': NO_SELECTED_FILE_ERROR}), 400

-    save_uploaded_images(images, TEMP_DIR)
+    temp_dir = tempfile.mkdtemp()
+
+    save_uploaded_images(images, temp_dir)

    try:
-        images_to_package(TEMP_DIR, OUTPUT_FILE)
+        images_to_package(temp_dir, OUTPUT_FILE)
        return send_from_directory('.', OUTPUT_FILE, as_attachment=True)
-    except Exception as e:  # Consider catching more specific exceptions
+    except Exception as e:
+        logging.error("Exception occurred: "+str(e), exc_info=True)
        return jsonify({'error': str(e)}), 500
    finally:
-        shutil.rmtree(TEMP_DIR)
+        shutil.rmtree(temp_dir)
+

 if __name__ == '__main__':
    app.run(debug=True)
--- a/utilities.py
+++ b/utilities.py
@ -0,0 +1,9 @@
+import os
+from constants import IMAGE_EXTENSIONS
+
+def is_image_file(path):
+    return any(path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS)
+
+def ensure_directory_exists(directory):
+    if not os.path.exists(directory):
+        os.mkdir(directory)