refactorings and security enhancements

This commit is contained in:
B.J. Dweck 2023-09-08 18:22:57 +03:00
parent 61827b4e11
commit a13f92548c
4 changed files with 46 additions and 34 deletions

View File

@ -53,7 +53,7 @@ To start the server:
python server.py python server.py
``` ```
#### Endpoint: `/generate-deck` #### Endpoint: `/deck-from-images`
**Method**: POST **Method**: POST

View File

@ -2,7 +2,7 @@ import sys
import os import os
from images2text import main as images_to_text from images2text import main as images_to_text
from text2csvdeck import create_csv_deck from text2csvdeck import text_file_to_csv_deck
CSV_DECK_NAME = "output_deck.csv" CSV_DECK_NAME = "output_deck.csv"
APKG_NAME = "output.apkg" APKG_NAME = "output.apkg"
@ -13,7 +13,7 @@ def pipeline(directory_path):
text_file_name = images_to_text(directory_path) text_file_name = images_to_text(directory_path)
# 2. Convert the text file to a CSV deck using ChatGPT # 2. Convert the text file to a CSV deck using ChatGPT
create_csv_deck(text_file_name) text_file_to_csv_deck(text_file_name)
# 3. Convert the CSV deck to an Anki package # 3. Convert the CSV deck to an Anki package
os.system(f"python csv2ankicards.py {CSV_DECK_NAME} {APKG_NAME}") os.system(f"python csv2ankicards.py {CSV_DECK_NAME} {APKG_NAME}")

View File

@ -1,42 +1,46 @@
from flask import Flask, request, send_from_directory, jsonify from flask import Flask, request, send_from_directory, jsonify
from werkzeug.utils import secure_filename
import os import os
import tempfile # For creating temporary directories import tempfile
import shutil # For removing directories import shutil
from pipeline import pipeline from pipeline import pipeline
app = Flask(__name__) app = Flask(__name__)
@app.route('/generate-deck', methods=['POST']) IMAGE_KEY = 'image'
def generate_deck(): OUTPUT_FILE = 'cards.apkg'
# Assuming images are sent as multipart/form-data TEMP_DIR = tempfile.mkdtemp()
if 'image' not in request.files:
def save_uploaded_images(images, directory):
for img in images:
# Sanitize the filename
safe_filename = secure_filename(img.filename)
if not safe_filename:
# Handle the case where the filename becomes empty after sanitization
raise ValueError("Invalid filename")
filename = os.path.join(directory, safe_filename)
img.save(filename)
@app.route('/deck-from-images', methods=['POST'])
def deck_from_images():
if IMAGE_KEY not in request.files:
return jsonify({'error': 'No image part'}), 400 return jsonify({'error': 'No image part'}), 400
images = request.files.getlist('image') images = request.files.getlist(IMAGE_KEY)
if not images or all([img.filename == '' for img in images]): if not images or not any(img.filename != '' for img in images):
return jsonify({'error': 'No selected file'}), 400 return jsonify({'error': 'No selected file'}), 400
# Create a temporary directory to store multiple images save_uploaded_images(images, TEMP_DIR)
temp_dir = tempfile.mkdtemp()
image_paths = []
for img in images:
image_path = os.path.join(temp_dir, img.filename)
img.save(image_path)
image_paths.append(image_path)
try: try:
# Run the pipeline using the saved images pipeline(TEMP_DIR)
# You might need to modify your pipeline to accept and handle multiple images return send_from_directory('.', OUTPUT_FILE, as_attachment=True)
pipeline(temp_dir) # Assuming pipeline works per directory of images except Exception as e: # Consider catching more specific exceptions
return send_from_directory('.', 'output.apkg', as_attachment=True)
except Exception as e:
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500
finally: finally:
# Cleanup: Remove the temporary directory and its content shutil.rmtree(TEMP_DIR)
shutil.rmtree(temp_dir)
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True) app.run(debug=True)

View File

@ -2,6 +2,9 @@ import openai
import sys import sys
import os import os
CHAT_MODEL = "gpt-3.5-turbo"
OUTPUT_FILENAME = "output_deck.csv"
API_KEY = os.environ.get("OPENAI_API_KEY") API_KEY = os.environ.get("OPENAI_API_KEY")
if not API_KEY: if not API_KEY:
raise ValueError("Please set the OPENAI_API_KEY environment variable.") raise ValueError("Please set the OPENAI_API_KEY environment variable.")
@ -13,8 +16,7 @@ PROMPT_TEMPLATE = """
Please come up with a set of 10 index cards for memorization, including front and back. Please come up with a set of 10 index cards for memorization, including front and back.
The index cards should completely capture the main points and themes of the text. The index cards should completely capture the main points and themes of the text.
In addition, they should contain any numbers or data that humans might find difficult to remember. In addition, they should contain any numbers or data that humans might find difficult to remember.
The goal of the index card set is that one who memorizes it can provide a summary of the text to someone else, The goal of the index card set is that one who memorizes it can provide a summary of the text to someone else, conveying the main points and themes.
conveying the main points and themes.
You will provide the questions and answers to me in CSV format, as follows: You will provide the questions and answers to me in CSV format, as follows:
``` ```
@ -28,17 +30,23 @@ The question/answer pairs shall not be numbered or contain any signs of being or
{content} {content}
""" """
def create_csv_deck(text_file_path): def text_file_to_csv_deck(text_file_path):
# Read the text content # Read the text content
with open(text_file_path, 'r') as file: with open(text_file_path, 'r') as file:
text_content = file.read() text_content = file.read()
content_to_csv(text_content)
def content_to_csv(text_content):
# Prepare the prompt # Prepare the prompt
prompt = PROMPT_TEMPLATE.format(content=text_content) prompt = PROMPT_TEMPLATE.format(content=text_content)
# Get completion from the OpenAI ChatGPT API # Get completion from the OpenAI ChatGPT API
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", model=CHAT_MODEL,
messages=[ messages=[
{"role": "user", "content": prompt} {"role": "user", "content": prompt}
], ],
@ -47,11 +55,11 @@ def create_csv_deck(text_file_path):
# Extract CSV content from response and save to a new file # Extract CSV content from response and save to a new file
csv_content = response.choices[0]['message']['content'] csv_content = response.choices[0]['message']['content']
output_filename = "output_deck.csv"
with open(output_filename, 'w') as csv_file: with open(OUTPUT_FILENAME, 'w') as csv_file:
csv_file.write(csv_content) csv_file.write(csv_content)
print(f"Saved generated deck to {output_filename}") print(f"Saved generated deck to {OUTPUT_FILENAME}")
if __name__ == "__main__": if __name__ == "__main__":
@ -59,4 +67,4 @@ if __name__ == "__main__":
print("Usage: python text2csvdeck.py <text_file_path>") print("Usage: python text2csvdeck.py <text_file_path>")
sys.exit(1) sys.exit(1)
create_csv_deck(sys.argv[1]) text_file_to_csv_deck(sys.argv[1])