diff --git a/ankiai.py b/ankiai.py index 5882905..8d83918 100644 --- a/ankiai.py +++ b/ankiai.py @@ -1,16 +1,19 @@ import sys +import logging +from logging_config import setup_logging from images2text import main as ocr_images from prompt4cards import prompt_for_card_content, response_to_json from json2deck import to_package +setup_logging() def images_to_package(directory_path, outfile): ocr_text = ocr_images(directory_path) response_text = prompt_for_card_content(ocr_text) deck_json = response_to_json(response_text) to_package(deck_json).write_to_file(outfile) - print(f"Deck created at: {outfile}") + logging.info(f"Deck created at: {outfile}") if __name__ == "__main__": diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..bce6b4c --- /dev/null +++ b/constants.py @@ -0,0 +1,14 @@ +# File and Directory Constants +CONVERTED_DIR = "converted" +FINAL_OUTPUT = "final.txt" +IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg'] +OUTPUT_FILENAME = "output_deck.json" + +# API Constants +API_KEY_ENV = "OPENAI_API_KEY" +CHAT_MODEL = "gpt-3.5-turbo" + +# Error Messages +NO_IMAGE_PART_ERROR = 'No image part' +NO_SELECTED_FILE_ERROR = 'No selected file' +INVALID_FILENAME_ERROR = 'Invalid filename' diff --git a/images2text.py b/images2text.py index 27f352e..54187ee 100755 --- a/images2text.py +++ b/images2text.py @@ -1,17 +1,20 @@ import os import sys +import logging + +from logging_config import setup_logging from subprocess import run, CalledProcessError from concurrent.futures import ThreadPoolExecutor +from utilities import is_image_file, ensure_directory_exists +from constants import CONVERTED_DIR, FINAL_OUTPUT -converted_dir = "converted" -def is_image_file(path): - lower_path = path.lower() - return lower_path.endswith('.png') or lower_path.endswith('.jpg') or lower_path.endswith('.jpeg') +setup_logging() + def convert_image(image_path): - print(f"Converting {image_path}...") - converted_path = os.path.join(converted_dir, os.path.basename(image_path)) + logging.info(f"Converting {image_path}...") + converted_path = os.path.join(CONVERTED_DIR, os.path.basename(image_path)) cmd = [ "convert", image_path, @@ -24,43 +27,45 @@ def convert_image(image_path): try: run(cmd, check=True) - print(f"Converted image output to {converted_path}!") + logging.info(f"Converted image output to {converted_path}!") return converted_path except CalledProcessError: - print(f"Error converting {image_path} with ImageMagick. Using original for Tesseract.") + logging.info(f"Error converting {image_path} with ImageMagick. Using original for Tesseract.") return image_path + def ocr_image(image_path): - print(f"OCR'ing {image_path}...") + logging.info(f"OCR'ing {image_path}...") text_filename = os.path.basename(image_path).replace(".jpg", ".txt") - text_path = os.path.join(converted_dir, text_filename) + text_path = os.path.join(CONVERTED_DIR, text_filename) cmd = ["tesseract", image_path, text_path.replace(".txt", "")] try: run(cmd, check=True) - print(f"OCRed to {text_path}!") + logging.info(f"OCRed to {text_path}!") return text_path except CalledProcessError: - print(f"Error processing {image_path} with Tesseract. Skipping.") + logging.info(f"Error processing {image_path} with Tesseract. Skipping.") return None + def process_image(image_path): converted_path = convert_image(image_path) - print(f"OCR'ing image {image_path} (now at {converted_path})...") + logging.info(f"OCR'ing image {image_path} (now at {converted_path})...") text_path = ocr_image(converted_path) if text_path and os.path.exists(text_path): with open(text_path, 'r') as text_file: text_content = text_file.read() - print(f"Added text from {text_path} to final output.") + logging.info(f"Added text from {text_path} to final output.") return text_content else: - print(f"Cannot locate {text_path}! Cannot add text to final output!") + logging.info(f"Cannot locate {text_path}! Cannot add text to final output!") return None + def main(directory_path): final_text = [] - if not os.path.exists(converted_dir): - os.mkdir(converted_dir) + ensure_directory_exists(CONVERTED_DIR) image_paths = [] for root, dirs, files in os.walk(directory_path): @@ -75,11 +80,10 @@ def main(directory_path): # Filter out any None values and write the text to final.txt final_text = [text for text in final_text if text is not None] - FINAL_OUTPUT = "final.txt" with open(FINAL_OUTPUT, 'w') as f: f.write("\n".join(final_text)) - print(f"All images processed! Final output saved to {FINAL_OUTPUT}") + logging.info(f"All images processed! Final output saved to {FINAL_OUTPUT}") return final_text # Add this line diff --git a/json2deck.py b/json2deck.py index a2d74f5..076a469 100644 --- a/json2deck.py +++ b/json2deck.py @@ -1,6 +1,12 @@ import json import genanki import sys +import logging +from logging_config import setup_logging + + +setup_logging() + # Create a new model for our cards. This is necessary for genanki. MY_MODEL = genanki.Model( @@ -52,4 +58,4 @@ if __name__ == "__main__": input_json = sys.argv[1] output_apkg = sys.argv[2] json_file_to_package(input_json).write_to_file(output_apkg) - print(f"Deck created at: {output_apkg}") + logging.info(f"Deck created at: {output_apkg}") diff --git a/logging_config.py b/logging_config.py new file mode 100644 index 0000000..1fc05f4 --- /dev/null +++ b/logging_config.py @@ -0,0 +1,11 @@ +import logging + +def setup_logging(): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s [%(levelname)s] - %(module)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S') + + # If you also want to save logs to a file, you can add the below lines. + # file_handler = logging.FileHandler('ankiai.log') + # file_handler.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] - %(module)s: %(message)s')) + # logging.getLogger().addHandler(file_handler) diff --git a/prompt4cards.py b/prompt4cards.py index 3f26432..a98725e 100644 --- a/prompt4cards.py +++ b/prompt4cards.py @@ -1,12 +1,11 @@ import openai -import sys import os +import sys import json +from constants import API_KEY_ENV, CHAT_MODEL, OUTPUT_FILENAME -CHAT_MODEL = "gpt-3.5-turbo" -OUTPUT_FILENAME = "output_deck.json" -API_KEY = os.environ.get("OPENAI_API_KEY") +API_KEY = os.environ.get(API_KEY_ENV) if not API_KEY: raise ValueError("Please set the OPENAI_API_KEY environment variable.") @@ -86,7 +85,7 @@ def response_to_json(response_text): if __name__ == "__main__": if len(sys.argv) != 2: - print("Usage: python text2jsondeck.py ") + print("Usage: python prompt4cards.py ") sys.exit(1) text_file_path = sys.argv[1] diff --git a/server.py b/server.py index a372748..43685f7 100644 --- a/server.py +++ b/server.py @@ -1,48 +1,54 @@ +import os +import tempfile +import shutil +import logging + +from logging_config import setup_logging from flask import Flask, request, send_from_directory, jsonify from werkzeug.utils import secure_filename -import os -import tempfile -import shutil - from ankiai import images_to_package +from constants import IMAGE_KEY, OUTPUT_FILE, NO_IMAGE_PART_ERROR, NO_SELECTED_FILE_ERROR, INVALID_FILENAME_ERROR + +setup_logging() + + +from logging_config import setup_logging app = Flask(__name__) -IMAGE_KEY = 'image' -OUTPUT_FILE = 'cards.apkg' -TEMP_DIR = tempfile.mkdtemp() - def save_uploaded_images(images, directory): for img in images: - # Sanitize the filename safe_filename = secure_filename(img.filename) - if not safe_filename: - # Handle the case where the filename becomes empty after sanitization - raise ValueError("Invalid filename") + raise ValueError(INVALID_FILENAME_ERROR) filename = os.path.join(directory, safe_filename) img.save(filename) + @app.route('/deck-from-images', methods=['POST']) def deck_from_images(): if IMAGE_KEY not in request.files: - return jsonify({'error': 'No image part'}), 400 + return jsonify({'error': NO_IMAGE_PART_ERROR}), 400 images = request.files.getlist(IMAGE_KEY) if not images or not any(img.filename != '' for img in images): - return jsonify({'error': 'No selected file'}), 400 + return jsonify({'error': NO_SELECTED_FILE_ERROR}), 400 - save_uploaded_images(images, TEMP_DIR) + temp_dir = tempfile.mkdtemp() + + save_uploaded_images(images, temp_dir) try: - images_to_package(TEMP_DIR, OUTPUT_FILE) + images_to_package(temp_dir, OUTPUT_FILE) return send_from_directory('.', OUTPUT_FILE, as_attachment=True) - except Exception as e: # Consider catching more specific exceptions + except Exception as e: + logging.error("Exception occurred: "+str(e), exc_info=True) return jsonify({'error': str(e)}), 500 finally: - shutil.rmtree(TEMP_DIR) + shutil.rmtree(temp_dir) + if __name__ == '__main__': app.run(debug=True) diff --git a/utilities.py b/utilities.py new file mode 100644 index 0000000..03ecc8d --- /dev/null +++ b/utilities.py @@ -0,0 +1,9 @@ +import os +from constants import IMAGE_EXTENSIONS + +def is_image_file(path): + return any(path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS) + +def ensure_directory_exists(directory): + if not os.path.exists(directory): + os.mkdir(directory)