From 28e6c8d6117857263cf6e510402a4edc9d99ae9f Mon Sep 17 00:00:00 2001 From: Benjamin Dweck Date: Mon, 11 Sep 2023 20:35:55 +0300 Subject: [PATCH] decoupled --- ankiai.py | 17 ++++---- constants.py | 6 ++- prompt4cards.py => deck_creation.py | 59 +++++++++++++++++++------- images2text.py => image_processing.py | 20 ++++++--- json2deck.py | 61 --------------------------- server.py | 8 ++-- utilities.py | 9 ---- 7 files changed, 74 insertions(+), 106 deletions(-) rename prompt4cards.py => deck_creation.py (68%) rename images2text.py => image_processing.py (87%) mode change 100755 => 100644 delete mode 100644 json2deck.py delete mode 100644 utilities.py diff --git a/ankiai.py b/ankiai.py index 8d83918..a0ec54f 100644 --- a/ankiai.py +++ b/ankiai.py @@ -2,18 +2,18 @@ import sys import logging from logging_config import setup_logging -from images2text import main as ocr_images -from prompt4cards import prompt_for_card_content, response_to_json -from json2deck import to_package +from image_processing import process_images +from deck_creation import prompt_for_card_content, response_to_json, to_package + +APKG_FILE = "out.apkg" setup_logging() -def images_to_package(directory_path, outfile): - ocr_text = ocr_images(directory_path) +def images_to_package(directory_path): + ocr_text = process_images(directory_path) response_text = prompt_for_card_content(ocr_text) deck_json = response_to_json(response_text) - to_package(deck_json).write_to_file(outfile) - logging.info(f"Deck created at: {outfile}") + return to_package(deck_json) if __name__ == "__main__": @@ -21,4 +21,5 @@ if __name__ == "__main__": print("Usage: python ankiai.py ") sys.exit(1) - images_to_package(sys.argv[1]) + images_to_package(sys.argv[1]).write_to_file(APKG_FILE) + logging.info(f"Deck created at: {APKG_FILE}") diff --git a/constants.py b/constants.py index bce6b4c..619f3b1 100644 --- a/constants.py +++ b/constants.py @@ -1,8 +1,10 @@ # File and Directory Constants +IMAGE_KEY="image" +APKG_FILE="out.apkg" CONVERTED_DIR = "converted" -FINAL_OUTPUT = "final.txt" +TEXT_OCR_FILE = "final.txt" IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg'] -OUTPUT_FILENAME = "output_deck.json" +DECK_JSON_FILE = "output_deck.json" # API Constants API_KEY_ENV = "OPENAI_API_KEY" diff --git a/prompt4cards.py b/deck_creation.py similarity index 68% rename from prompt4cards.py rename to deck_creation.py index a98725e..2e54e56 100644 --- a/prompt4cards.py +++ b/deck_creation.py @@ -1,8 +1,12 @@ import openai import os -import sys import json -from constants import API_KEY_ENV, CHAT_MODEL, OUTPUT_FILENAME +import genanki +from logging_config import setup_logging +from constants import API_KEY_ENV, CHAT_MODEL + + +setup_logging() API_KEY = os.environ.get(API_KEY_ENV) @@ -83,21 +87,44 @@ def response_to_json(response_text): } -if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python prompt4cards.py ") - sys.exit(1) - - text_file_path = sys.argv[1] +# Create a new model for our cards. This is necessary for genanki. +MY_MODEL = genanki.Model( + 1607372319, + "Simple Model", + fields=[ + {"name": "Title"}, + {"name": "Question"}, + {"name": "Answer"}, + ], + templates=[ + { + "name": "{{Title}}", + "qfmt": "{{Question}}", + "afmt": "{{FrontSide}}
{{Answer}}", + }, + ]) - # Read the text content - with open(text_file_path, 'r') as file: - text_content = file.read() +def json_file_to_package(json_path): + with open(json_path, 'r', encoding='utf-8') as f: + json_data = json.load(f) + package = to_package(json_data) - response_text = prompt_for_card_content(text_content) - deck_json = response_to_json(response_text) + return package - with open(OUTPUT_FILENAME, 'w') as json_file: - json.dump(deck_json, json_file) +def to_package(deck_json): + deck_title = deck_json["DeckTitle"] + deck = genanki.Deck(1607372319, deck_title) - print(f"Saved generated deck to {OUTPUT_FILENAME}") + for card_json in deck_json["Cards"]: + title = card_json["Title"] + question = card_json["Question"] + answer = card_json["Answer"] + + note = genanki.Note( + model=MY_MODEL, + fields=[title, question, answer] + ) + + deck.add_note(note) + + return genanki.Package(deck) diff --git a/images2text.py b/image_processing.py old mode 100755 new mode 100644 similarity index 87% rename from images2text.py rename to image_processing.py index 54187ee..5275512 --- a/images2text.py +++ b/image_processing.py @@ -5,13 +5,21 @@ import logging from logging_config import setup_logging from subprocess import run, CalledProcessError from concurrent.futures import ThreadPoolExecutor -from utilities import is_image_file, ensure_directory_exists -from constants import CONVERTED_DIR, FINAL_OUTPUT +from constants import CONVERTED_DIR, TEXT_OCR_FILE, IMAGE_EXTENSIONS setup_logging() +def is_image_file(path): + return any(path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS) + + +def ensure_directory_exists(directory): + if not os.path.exists(directory): + os.mkdir(directory) + + def convert_image(image_path): logging.info(f"Converting {image_path}...") converted_path = os.path.join(CONVERTED_DIR, os.path.basename(image_path)) @@ -62,7 +70,7 @@ def process_image(image_path): return None -def main(directory_path): +def process_images(directory_path): final_text = [] ensure_directory_exists(CONVERTED_DIR) @@ -80,10 +88,10 @@ def main(directory_path): # Filter out any None values and write the text to final.txt final_text = [text for text in final_text if text is not None] - with open(FINAL_OUTPUT, 'w') as f: + with open(TEXT_OCR_FILE, 'w') as f: f.write("\n".join(final_text)) - logging.info(f"All images processed! Final output saved to {FINAL_OUTPUT}") + logging.info(f"All images processed! Final output saved to {TEXT_OCR_FILE}") return final_text # Add this line @@ -91,4 +99,4 @@ if __name__ == "__main__": if len(sys.argv) != 2: print("Usage: python images2text.py ") sys.exit(1) - main(sys.argv[1]) + process_images(sys.argv[1]) diff --git a/json2deck.py b/json2deck.py deleted file mode 100644 index 076a469..0000000 --- a/json2deck.py +++ /dev/null @@ -1,61 +0,0 @@ -import json -import genanki -import sys -import logging -from logging_config import setup_logging - - -setup_logging() - - -# Create a new model for our cards. This is necessary for genanki. -MY_MODEL = genanki.Model( - 1607372319, - "Simple Model", - fields=[ - {"name": "Title"}, - {"name": "Question"}, - {"name": "Answer"}, - ], - templates=[ - { - "name": "{{Title}}", - "qfmt": "{{Question}}", - "afmt": "{{FrontSide}}
{{Answer}}", - }, - ]) - -def json_file_to_package(json_path): - with open(json_path, 'r', encoding='utf-8') as f: - json_data = json.load(f) - package = to_package(json_data) - - return package - -def to_package(deck_json): - deck_title = deck_json["DeckTitle"] - deck = genanki.Deck(1607372319, deck_title) - - for card_json in deck_json["Cards"]: - title = card_json["Title"] - question = card_json["Question"] - answer = card_json["Answer"] - - note = genanki.Note( - model=MY_MODEL, - fields=[title, question, answer] - ) - - deck.add_note(note) - - return genanki.Package(deck) - -if __name__ == "__main__": - if len(sys.argv) != 3: - print("Usage: python convert.py ") - sys.exit(1) - - input_json = sys.argv[1] - output_apkg = sys.argv[2] - json_file_to_package(input_json).write_to_file(output_apkg) - logging.info(f"Deck created at: {output_apkg}") diff --git a/server.py b/server.py index 43685f7..8fd3dcc 100644 --- a/server.py +++ b/server.py @@ -7,8 +7,7 @@ from logging_config import setup_logging from flask import Flask, request, send_from_directory, jsonify from werkzeug.utils import secure_filename from ankiai import images_to_package -from constants import IMAGE_KEY, OUTPUT_FILE, NO_IMAGE_PART_ERROR, NO_SELECTED_FILE_ERROR, INVALID_FILENAME_ERROR - +from constants import IMAGE_KEY, APKG_FILE, NO_IMAGE_PART_ERROR, NO_SELECTED_FILE_ERROR, INVALID_FILENAME_ERROR setup_logging() @@ -41,8 +40,9 @@ def deck_from_images(): save_uploaded_images(images, temp_dir) try: - images_to_package(temp_dir, OUTPUT_FILE) - return send_from_directory('.', OUTPUT_FILE, as_attachment=True) + images_to_package(temp_dir).write_to_file(APKG_FILE) + logging.info(f"Anki package written to {APKG_FILE}") + return send_from_directory('.', APKG_FILE, as_attachment=True) except Exception as e: logging.error("Exception occurred: "+str(e), exc_info=True) return jsonify({'error': str(e)}), 500 diff --git a/utilities.py b/utilities.py deleted file mode 100644 index 03ecc8d..0000000 --- a/utilities.py +++ /dev/null @@ -1,9 +0,0 @@ -import os -from constants import IMAGE_EXTENSIONS - -def is_image_file(path): - return any(path.lower().endswith(ext) for ext in IMAGE_EXTENSIONS) - -def ensure_directory_exists(directory): - if not os.path.exists(directory): - os.mkdir(directory)