File size: 3,702 Bytes
7493fc8
b3dc6d4
5434acc
 
 
b3dc6d4
5434acc
 
 
 
 
b3dc6d4
5434acc
 
e599ea9
 
 
 
 
 
 
 
 
 
 
ce3f3b5
e599ea9
 
 
 
b3dc6d4
5434acc
 
 
 
 
 
b3dc6d4
5434acc
1526f04
 
 
 
 
7493fc8
5434acc
1526f04
ce3f3b5
1526f04
 
 
5434acc
ce3f3b5
 
 
 
 
5434acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7493fc8
5434acc
 
7493fc8
5434acc
ce3f3b5
7493fc8
1526f04
 
 
7493fc8
1526f04
 
 
7493fc8
 
 
 
 
 
 
 
 
 
 
5434acc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# app.py (Streamlit-only version for Hugging Face Spaces with error handling)

import os
import tempfile
from typing import List

import fitz  # PyMuPDF
import requests
from transformers import pipeline
from gtts import gTTS
import streamlit as st

# ---------- CONFIG ----------
def summarize_text(text: str) -> str:
    if not text.strip():
        return "Summary not available (empty text)."

    try:
        # Truncate long text safely
        if len(text) > 2000:
            text = text[:2000]

        summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
        result = summarizer(text, max_length=200, min_length=30, do_sample=False)

        if result and isinstance(result, list) and 'summary_text' in result[0]:
            return result[0]['summary_text']
        return "Summary not available (model did not return text)."
    except Exception as e:
        return f"Summary failed: {str(e)}"

def extract_text_from_pdf(pdf_path: str) -> str:
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def classify_topic(text: str, topics: List[str]) -> str:
    if not text.strip():
        return "Unknown (no text extracted)"
    if not topics:
        return "Unknown (no topics provided)"

    classifier = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-3")
    result = classifier(text[:1000], candidate_labels=topics)

    if 'labels' in result and isinstance(result['labels'], list) and len(result['labels']) > 0:
        return result['labels'][0]
    return "Unknown (classification failed)"

def generate_audio(text: str, output_path: str):
    try:
        tts = gTTS(text)
        tts.save(output_path)
    except Exception as e:
        raise RuntimeError(f"Audio generation failed: {str(e)}")

# ---------- STREAMLIT UI ----------
st.set_page_config(page_title="Research Paper Summarizer", layout="centered")
st.title("πŸ“„ AI Research Paper Summarizer")

st.markdown("""
Upload a research paper (PDF) and a list of topics. The app will:
1. Extract and summarize the paper
2. Classify it into a topic
3. Generate an audio summary 🎧
""")

with st.form("upload_form"):
    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
    topic_input = st.text_input("Enter comma-separated topics")
    submitted = st.form_submit_button("Summarize and Generate Audio")

if submitted and uploaded_file and topic_input:
    with st.spinner("Processing paper..."):
        try:
            temp_dir = tempfile.mkdtemp()
            file_path = os.path.join(temp_dir, uploaded_file.name)

            with open(file_path, "wb") as f:
                f.write(uploaded_file.read())

            text = extract_text_from_pdf(file_path)
            st.info(f"Extracted text length: {len(text)} characters")

            if not text.strip():
                st.error("❌ No text could be extracted from the PDF. Try another file.")
            else:
                topic_list = [t.strip() for t in topic_input.split(",") if t.strip()]
                classified_topic = classify_topic(text, topic_list)
                summary = summarize_text(text)

                st.markdown(f"### 🧠 Classified Topic: `{classified_topic}`")
                st.markdown("### ✍️ Summary:")
                st.write(summary)

                audio_path = os.path.join(temp_dir, "summary.mp3")
                generate_audio(summary, audio_path)

                st.markdown("### πŸ”Š Audio Summary")
                st.audio(audio_path)
                st.success("Done! Audio summary is ready.")

        except Exception as e:
            st.error(f"❌ Error: {str(e)}")