mike23415's picture
Update app.py
2a3fae3 verified
raw
history blame
2.14 kB
import os
import io
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation
app = Flask(__name__)
# Allowed file extensions
ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "txt"}
def allowed_file(filename):
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route("/summarize", methods=["POST"])
def summarize():
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No selected file"}), 400
if not allowed_file(file.filename):
return jsonify({"error": "Unsupported file format"}), 400
filename = secure_filename(file.filename)
file_content = file.read()
# Process file based on type
summary = None
file_ext = filename.rsplit(".", 1)[1].lower()
if file_ext == "pdf":
summary = summarize_pdf(file_content)
elif file_ext == "docx":
summary = summarize_docx(file_content)
elif file_ext == "pptx":
summary = summarize_pptx(file_content)
elif file_ext == "txt":
summary = summarize_txt(file_content)
return jsonify({"filename": filename, "summary": summary})
# Summarization functions
def summarize_pdf(file_content):
reader = PdfReader(io.BytesIO(file_content))
text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
return text[:500] # Returning a short summary (first 500 chars)
def summarize_docx(file_content):
doc = Document(io.BytesIO(file_content))
text = "\n".join([para.text for para in doc.paragraphs])
return text[:500]
def summarize_pptx(file_content):
ppt = Presentation(io.BytesIO(file_content))
text = "\n".join([slide.shapes.title.text for slide in ppt.slides if slide.shapes.title])
return text[:500]
def summarize_txt(file_content):
text = file_content.decode("utf-8")
return text[:500]
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)