Spaces:
Sleeping
Sleeping
File size: 2,138 Bytes
764d4f7 2a3fae3 3b4df89 764d4f7 2a3fae3 764d4f7 2a3fae3 524f780 764d4f7 d2d0219 2a3fae3 d2d0219 2a3fae3 d2d0219 3b4df89 2a3fae3 764d4f7 d2d0219 764d4f7 2a3fae3 764d4f7 2a3fae3 764d4f7 2a3fae3 764d4f7 2a3fae3 d2d0219 2a3fae3 d2d0219 2a3fae3 3b4df89 2a3fae3 3b4df89 9fd7d89 3b4df89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import io
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation
app = Flask(__name__)
# Allowed file extensions
ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "txt"}
def allowed_file(filename):
return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
@app.route("/summarize", methods=["POST"])
def summarize():
if "file" not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No selected file"}), 400
if not allowed_file(file.filename):
return jsonify({"error": "Unsupported file format"}), 400
filename = secure_filename(file.filename)
file_content = file.read()
# Process file based on type
summary = None
file_ext = filename.rsplit(".", 1)[1].lower()
if file_ext == "pdf":
summary = summarize_pdf(file_content)
elif file_ext == "docx":
summary = summarize_docx(file_content)
elif file_ext == "pptx":
summary = summarize_pptx(file_content)
elif file_ext == "txt":
summary = summarize_txt(file_content)
return jsonify({"filename": filename, "summary": summary})
# Summarization functions
def summarize_pdf(file_content):
reader = PdfReader(io.BytesIO(file_content))
text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
return text[:500] # Returning a short summary (first 500 chars)
def summarize_docx(file_content):
doc = Document(io.BytesIO(file_content))
text = "\n".join([para.text for para in doc.paragraphs])
return text[:500]
def summarize_pptx(file_content):
ppt = Presentation(io.BytesIO(file_content))
text = "\n".join([slide.shapes.title.text for slide in ppt.slides if slide.shapes.title])
return text[:500]
def summarize_txt(file_content):
text = file_content.decode("utf-8")
return text[:500]
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|