File size: 2,138 Bytes
764d4f7
2a3fae3
3b4df89
764d4f7
2a3fae3
764d4f7
2a3fae3
524f780
764d4f7
d2d0219
2a3fae3
 
d2d0219
2a3fae3
 
d2d0219
3b4df89
2a3fae3
764d4f7
d2d0219
 
764d4f7
2a3fae3
764d4f7
 
 
2a3fae3
 
 
764d4f7
2a3fae3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
764d4f7
2a3fae3
 
 
 
 
d2d0219
2a3fae3
 
 
 
d2d0219
2a3fae3
 
 
 
3b4df89
2a3fae3
 
 
3b4df89
9fd7d89
3b4df89
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import io
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
from PyPDF2 import PdfReader
from docx import Document
from pptx import Presentation

app = Flask(__name__)

# Allowed file extensions
ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "txt"}

def allowed_file(filename):
    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route("/summarize", methods=["POST"])
def summarize():
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400

    file = request.files["file"]
    
    if file.filename == "":
        return jsonify({"error": "No selected file"}), 400

    if not allowed_file(file.filename):
        return jsonify({"error": "Unsupported file format"}), 400

    filename = secure_filename(file.filename)
    file_content = file.read()

    # Process file based on type
    summary = None
    file_ext = filename.rsplit(".", 1)[1].lower()

    if file_ext == "pdf":
        summary = summarize_pdf(file_content)
    elif file_ext == "docx":
        summary = summarize_docx(file_content)
    elif file_ext == "pptx":
        summary = summarize_pptx(file_content)
    elif file_ext == "txt":
        summary = summarize_txt(file_content)

    return jsonify({"filename": filename, "summary": summary})

# Summarization functions
def summarize_pdf(file_content):
    reader = PdfReader(io.BytesIO(file_content))
    text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
    return text[:500]  # Returning a short summary (first 500 chars)

def summarize_docx(file_content):
    doc = Document(io.BytesIO(file_content))
    text = "\n".join([para.text for para in doc.paragraphs])
    return text[:500]

def summarize_pptx(file_content):
    ppt = Presentation(io.BytesIO(file_content))
    text = "\n".join([slide.shapes.title.text for slide in ppt.slides if slide.shapes.title])
    return text[:500]

def summarize_txt(file_content):
    text = file_content.decode("utf-8")
    return text[:500]

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)