File size: 3,273 Bytes
78036fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7419cd8
 
78036fd
7419cd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78036fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from transformers import pipeline
import networkx as nx
import numpy as np
import re
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download NLTK data
nltk.download('punkt')

# Load Transformer model for abstractive summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Extractive summarization using TextRank
def extractive_summarization(text, num_sentences=3):
    sentences = sent_tokenize(text)

    # Handle cases where input text is too short
    if len(sentences) <= num_sentences:
        return "Text is too short for extractive summarization."

    try:
        vectorizer = TfidfVectorizer(stop_words="english")
        sentence_vectors = vectorizer.fit_transform(sentences)

        # Handle cases where vectorization fails due to low variation in text
        if sentence_vectors.shape[0] < num_sentences:
            return "Insufficient unique content for extractive summarization."

        similarity_matrix = cosine_similarity(sentence_vectors)
        graph = nx.from_numpy_array(similarity_matrix)
        scores = nx.pagerank(graph)

        ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
        return " ".join([s for _, s in ranked_sentences[:num_sentences]])

    except Exception as e:
        return f"Error in extractive summarization: {str(e)}"


# Abstractive summarization using BART
def abstractive_summarization(text, length):
    if len(text.split()) < 30:
        return "Text is too short for summarization."
    max_length = {"short": 50, "medium": 100, "long": 150}[length]
    summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text']
    return summary

# Main function
def summarize_text(text, method, length):
    if method == "Abstractive (BART)":
        return abstractive_summarization(text, length)
    else:
        num_sentences = {"short": 2, "medium": 4, "long": 6}[length]
        return extractive_summarization(text, num_sentences)

# Function to process file upload
def process_file(file):
    return file.read().decode("utf-8")

# UI with Gradio
with gr.Blocks(theme=gr.themes.Soft()) as iface:
    gr.Markdown("# 📄 AI-Powered Text Summarizer")
    gr.Markdown("Summarize long articles, news, and research papers using advanced NLP models.")

    with gr.Row():
        method_choice = gr.Radio(["Abstractive (BART)", "Extractive (TextRank)"], label="Summarization Type", value="Abstractive (BART)")
        length_choice = gr.Radio(["short", "medium", "long"], label="Summary Length", value="medium")

    text_input = gr.Textbox(lines=8, placeholder="Paste long text here...", label="Input Text")
    file_input = gr.File(label="Or Upload a .txt file")
    summarize_button = gr.Button("Summarize ✨")

    summary_output = gr.Textbox(lines=6, label="Summarized Text", interactive=False)
    
    file_input.change(process_file, inputs=file_input, outputs=text_input)
    summarize_button.click(summarize_text, inputs=[text_input, method_choice, length_choice], outputs=summary_output)

# Launch app
if __name__ == "__main__":
    iface.launch()