Spaces:
Sleeping
Sleeping
import os | |
import io | |
from flask import Flask, request, jsonify | |
from werkzeug.utils import secure_filename | |
from PyPDF2 import PdfReader | |
from docx import Document | |
from pptx import Presentation | |
app = Flask(__name__) | |
# Allowed file extensions | |
ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "txt"} | |
def allowed_file(filename): | |
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS | |
def summarize(): | |
if "file" not in request.files: | |
return jsonify({"error": "No file uploaded"}), 400 | |
file = request.files["file"] | |
if file.filename == "": | |
return jsonify({"error": "No selected file"}), 400 | |
if not allowed_file(file.filename): | |
return jsonify({"error": "Unsupported file format"}), 400 | |
filename = secure_filename(file.filename) | |
file_content = file.read() | |
# Process file based on type | |
summary = None | |
file_ext = filename.rsplit(".", 1)[1].lower() | |
if file_ext == "pdf": | |
summary = summarize_pdf(file_content) | |
elif file_ext == "docx": | |
summary = summarize_docx(file_content) | |
elif file_ext == "pptx": | |
summary = summarize_pptx(file_content) | |
elif file_ext == "txt": | |
summary = summarize_txt(file_content) | |
return jsonify({"filename": filename, "summary": summary}) | |
# Summarization functions | |
def summarize_pdf(file_content): | |
reader = PdfReader(io.BytesIO(file_content)) | |
text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
return text[:500] # Returning a short summary (first 500 chars) | |
def summarize_docx(file_content): | |
doc = Document(io.BytesIO(file_content)) | |
text = "\n".join([para.text for para in doc.paragraphs]) | |
return text[:500] | |
def summarize_pptx(file_content): | |
ppt = Presentation(io.BytesIO(file_content)) | |
text = "\n".join([slide.shapes.title.text for slide in ppt.slides if slide.shapes.title]) | |
return text[:500] | |
def summarize_txt(file_content): | |
text = file_content.decode("utf-8") | |
return text[:500] | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860) | |