From a13f92548cf63a3bc50e83d1f1c2aa6736e25583 Mon Sep 17 00:00:00 2001 From: Benjamin Dweck Date: Fri, 8 Sep 2023 18:22:57 +0300 Subject: [PATCH] refactorings and security enhancements --- README.md | 2 +- pipeline.py | 4 ++-- server.py | 50 ++++++++++++++++++++++++++----------------------- text2csvdeck.py | 24 ++++++++++++++++-------- 4 files changed, 46 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 41f655b..9cc3403 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ To start the server: python server.py ``` -#### Endpoint: `/generate-deck` +#### Endpoint: `/deck-from-images` **Method**: POST diff --git a/pipeline.py b/pipeline.py index 79139f2..86415d7 100644 --- a/pipeline.py +++ b/pipeline.py @@ -2,7 +2,7 @@ import sys import os from images2text import main as images_to_text -from text2csvdeck import create_csv_deck +from text2csvdeck import text_file_to_csv_deck CSV_DECK_NAME = "output_deck.csv" APKG_NAME = "output.apkg" @@ -13,7 +13,7 @@ def pipeline(directory_path): text_file_name = images_to_text(directory_path) # 2. Convert the text file to a CSV deck using ChatGPT - create_csv_deck(text_file_name) + text_file_to_csv_deck(text_file_name) # 3. Convert the CSV deck to an Anki package os.system(f"python csv2ankicards.py {CSV_DECK_NAME} {APKG_NAME}") diff --git a/server.py b/server.py index 7bfc869..c56c09a 100644 --- a/server.py +++ b/server.py @@ -1,42 +1,46 @@ from flask import Flask, request, send_from_directory, jsonify +from werkzeug.utils import secure_filename import os -import tempfile # For creating temporary directories -import shutil # For removing directories +import tempfile +import shutil from pipeline import pipeline app = Flask(__name__) -@app.route('/generate-deck', methods=['POST']) -def generate_deck(): - # Assuming images are sent as multipart/form-data - if 'image' not in request.files: +IMAGE_KEY = 'image' +OUTPUT_FILE = 'cards.apkg' +TEMP_DIR = tempfile.mkdtemp() + +def save_uploaded_images(images, directory): + for img in images: + # Sanitize the filename + safe_filename = secure_filename(img.filename) + if not safe_filename: + # Handle the case where the filename becomes empty after sanitization + raise ValueError("Invalid filename") + filename = os.path.join(directory, safe_filename) + img.save(filename) + +@app.route('/deck-from-images', methods=['POST']) +def deck_from_images(): + if IMAGE_KEY not in request.files: return jsonify({'error': 'No image part'}), 400 - images = request.files.getlist('image') + images = request.files.getlist(IMAGE_KEY) - if not images or all([img.filename == '' for img in images]): + if not images or not any(img.filename != '' for img in images): return jsonify({'error': 'No selected file'}), 400 - # Create a temporary directory to store multiple images - temp_dir = tempfile.mkdtemp() - - image_paths = [] - for img in images: - image_path = os.path.join(temp_dir, img.filename) - img.save(image_path) - image_paths.append(image_path) + save_uploaded_images(images, TEMP_DIR) try: - # Run the pipeline using the saved images - # You might need to modify your pipeline to accept and handle multiple images - pipeline(temp_dir) # Assuming pipeline works per directory of images - return send_from_directory('.', 'output.apkg', as_attachment=True) - except Exception as e: + pipeline(TEMP_DIR) + return send_from_directory('.', OUTPUT_FILE, as_attachment=True) + except Exception as e: # Consider catching more specific exceptions return jsonify({'error': str(e)}), 500 finally: - # Cleanup: Remove the temporary directory and its content - shutil.rmtree(temp_dir) + shutil.rmtree(TEMP_DIR) if __name__ == '__main__': app.run(debug=True) diff --git a/text2csvdeck.py b/text2csvdeck.py index 876bb70..8517bc0 100644 --- a/text2csvdeck.py +++ b/text2csvdeck.py @@ -2,6 +2,9 @@ import openai import sys import os +CHAT_MODEL = "gpt-3.5-turbo" +OUTPUT_FILENAME = "output_deck.csv" + API_KEY = os.environ.get("OPENAI_API_KEY") if not API_KEY: raise ValueError("Please set the OPENAI_API_KEY environment variable.") @@ -13,8 +16,7 @@ PROMPT_TEMPLATE = """ Please come up with a set of 10 index cards for memorization, including front and back. The index cards should completely capture the main points and themes of the text. In addition, they should contain any numbers or data that humans might find difficult to remember. -The goal of the index card set is that one who memorizes it can provide a summary of the text to someone else, -conveying the main points and themes. +The goal of the index card set is that one who memorizes it can provide a summary of the text to someone else, conveying the main points and themes. You will provide the questions and answers to me in CSV format, as follows: ``` @@ -28,17 +30,23 @@ The question/answer pairs shall not be numbered or contain any signs of being or {content} """ -def create_csv_deck(text_file_path): +def text_file_to_csv_deck(text_file_path): + # Read the text content with open(text_file_path, 'r') as file: text_content = file.read() + content_to_csv(text_content) + + +def content_to_csv(text_content): + # Prepare the prompt prompt = PROMPT_TEMPLATE.format(content=text_content) # Get completion from the OpenAI ChatGPT API response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", + model=CHAT_MODEL, messages=[ {"role": "user", "content": prompt} ], @@ -47,11 +55,11 @@ def create_csv_deck(text_file_path): # Extract CSV content from response and save to a new file csv_content = response.choices[0]['message']['content'] - output_filename = "output_deck.csv" - with open(output_filename, 'w') as csv_file: + + with open(OUTPUT_FILENAME, 'w') as csv_file: csv_file.write(csv_content) - print(f"Saved generated deck to {output_filename}") + print(f"Saved generated deck to {OUTPUT_FILENAME}") if __name__ == "__main__": @@ -59,4 +67,4 @@ if __name__ == "__main__": print("Usage: python text2csvdeck.py ") sys.exit(1) - create_csv_deck(sys.argv[1]) + text_file_to_csv_deck(sys.argv[1])