refactorings and security enhancements

master
B.J. Dweck 2023-09-08 18:22:57 +03:00
parent 61827b4e11
commit a13f92548c
4 changed files with 46 additions and 34 deletions

View File

@ -53,7 +53,7 @@ To start the server:
python server.py
```
#### Endpoint: `/generate-deck`
#### Endpoint: `/deck-from-images`
**Method**: POST

View File

@ -2,7 +2,7 @@ import sys
import os
from images2text import main as images_to_text
from text2csvdeck import create_csv_deck
from text2csvdeck import text_file_to_csv_deck
CSV_DECK_NAME = "output_deck.csv"
APKG_NAME = "output.apkg"
@ -13,7 +13,7 @@ def pipeline(directory_path):
text_file_name = images_to_text(directory_path)
# 2. Convert the text file to a CSV deck using ChatGPT
create_csv_deck(text_file_name)
text_file_to_csv_deck(text_file_name)
# 3. Convert the CSV deck to an Anki package
os.system(f"python csv2ankicards.py {CSV_DECK_NAME} {APKG_NAME}")

View File

@ -1,42 +1,46 @@
from flask import Flask, request, send_from_directory, jsonify
from werkzeug.utils import secure_filename
import os
import tempfile # For creating temporary directories
import shutil # For removing directories
import tempfile
import shutil
from pipeline import pipeline
app = Flask(__name__)
@app.route('/generate-deck', methods=['POST'])
def generate_deck():
# Assuming images are sent as multipart/form-data
if 'image' not in request.files:
IMAGE_KEY = 'image'
OUTPUT_FILE = 'cards.apkg'
TEMP_DIR = tempfile.mkdtemp()
def save_uploaded_images(images, directory):
for img in images:
# Sanitize the filename
safe_filename = secure_filename(img.filename)
if not safe_filename:
# Handle the case where the filename becomes empty after sanitization
raise ValueError("Invalid filename")
filename = os.path.join(directory, safe_filename)
img.save(filename)
@app.route('/deck-from-images', methods=['POST'])
def deck_from_images():
if IMAGE_KEY not in request.files:
return jsonify({'error': 'No image part'}), 400
images = request.files.getlist('image')
images = request.files.getlist(IMAGE_KEY)
if not images or all([img.filename == '' for img in images]):
if not images or not any(img.filename != '' for img in images):
return jsonify({'error': 'No selected file'}), 400
# Create a temporary directory to store multiple images
temp_dir = tempfile.mkdtemp()
image_paths = []
for img in images:
image_path = os.path.join(temp_dir, img.filename)
img.save(image_path)
image_paths.append(image_path)
save_uploaded_images(images, TEMP_DIR)
try:
# Run the pipeline using the saved images
# You might need to modify your pipeline to accept and handle multiple images
pipeline(temp_dir) # Assuming pipeline works per directory of images
return send_from_directory('.', 'output.apkg', as_attachment=True)
except Exception as e:
pipeline(TEMP_DIR)
return send_from_directory('.', OUTPUT_FILE, as_attachment=True)
except Exception as e: # Consider catching more specific exceptions
return jsonify({'error': str(e)}), 500
finally:
# Cleanup: Remove the temporary directory and its content
shutil.rmtree(temp_dir)
shutil.rmtree(TEMP_DIR)
if __name__ == '__main__':
app.run(debug=True)

View File

@ -2,6 +2,9 @@ import openai
import sys
import os
CHAT_MODEL = "gpt-3.5-turbo"
OUTPUT_FILENAME = "output_deck.csv"
API_KEY = os.environ.get("OPENAI_API_KEY")
if not API_KEY:
raise ValueError("Please set the OPENAI_API_KEY environment variable.")
@ -13,8 +16,7 @@ PROMPT_TEMPLATE = """
Please come up with a set of 10 index cards for memorization, including front and back.
The index cards should completely capture the main points and themes of the text.
In addition, they should contain any numbers or data that humans might find difficult to remember.
The goal of the index card set is that one who memorizes it can provide a summary of the text to someone else,
conveying the main points and themes.
The goal of the index card set is that one who memorizes it can provide a summary of the text to someone else, conveying the main points and themes.
You will provide the questions and answers to me in CSV format, as follows:
```
@ -28,17 +30,23 @@ The question/answer pairs shall not be numbered or contain any signs of being or
{content}
"""
def create_csv_deck(text_file_path):
def text_file_to_csv_deck(text_file_path):
# Read the text content
with open(text_file_path, 'r') as file:
text_content = file.read()
content_to_csv(text_content)
def content_to_csv(text_content):
# Prepare the prompt
prompt = PROMPT_TEMPLATE.format(content=text_content)
# Get completion from the OpenAI ChatGPT API
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
model=CHAT_MODEL,
messages=[
{"role": "user", "content": prompt}
],
@ -47,11 +55,11 @@ def create_csv_deck(text_file_path):
# Extract CSV content from response and save to a new file
csv_content = response.choices[0]['message']['content']
output_filename = "output_deck.csv"
with open(output_filename, 'w') as csv_file:
with open(OUTPUT_FILENAME, 'w') as csv_file:
csv_file.write(csv_content)
print(f"Saved generated deck to {output_filename}")
print(f"Saved generated deck to {OUTPUT_FILENAME}")
if __name__ == "__main__":
@ -59,4 +67,4 @@ if __name__ == "__main__":
print("Usage: python text2csvdeck.py <text_file_path>")
sys.exit(1)
create_csv_deck(sys.argv[1])
text_file_to_csv_deck(sys.argv[1])