olivercareyncl commited on
Commit
78036fd
·
verified ·
1 Parent(s): f7c4b53

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import networkx as nx
4
+ import numpy as np
5
+ import re
6
+ import nltk
7
+ from nltk.tokenize import sent_tokenize
8
+ from sklearn.feature_extraction.text import TfidfVectorizer
9
+ from sklearn.metrics.pairwise import cosine_similarity
10
+
11
+ # Download NLTK data
12
+ nltk.download('punkt')
13
+
14
+ # Load Transformer model for abstractive summarization
15
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
16
+
17
+ # Extractive summarization using TextRank
18
+ def extractive_summarization(text, num_sentences=3):
19
+ sentences = sent_tokenize(text)
20
+ if len(sentences) <= num_sentences:
21
+ return text # If text is short, return as is
22
+
23
+ vectorizer = TfidfVectorizer(stop_words="english")
24
+ sentence_vectors = vectorizer.fit_transform(sentences)
25
+ similarity_matrix = cosine_similarity(sentence_vectors)
26
+ graph = nx.from_numpy_array(similarity_matrix)
27
+ scores = nx.pagerank(graph)
28
+ ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
29
+ return " ".join([s for _, s in ranked_sentences[:num_sentences]])
30
+
31
+ # Abstractive summarization using BART
32
+ def abstractive_summarization(text, length):
33
+ if len(text.split()) < 30:
34
+ return "Text is too short for summarization."
35
+ max_length = {"short": 50, "medium": 100, "long": 150}[length]
36
+ summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text']
37
+ return summary
38
+
39
+ # Main function
40
+ def summarize_text(text, method, length):
41
+ if method == "Abstractive (BART)":
42
+ return abstractive_summarization(text, length)
43
+ else:
44
+ num_sentences = {"short": 2, "medium": 4, "long": 6}[length]
45
+ return extractive_summarization(text, num_sentences)
46
+
47
+ # Function to process file upload
48
+ def process_file(file):
49
+ return file.read().decode("utf-8")
50
+
51
+ # UI with Gradio
52
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
53
+ gr.Markdown("# 📄 AI-Powered Text Summarizer")
54
+ gr.Markdown("Summarize long articles, news, and research papers using advanced NLP models.")
55
+
56
+ with gr.Row():
57
+ method_choice = gr.Radio(["Abstractive (BART)", "Extractive (TextRank)"], label="Summarization Type", value="Abstractive (BART)")
58
+ length_choice = gr.Radio(["short", "medium", "long"], label="Summary Length", value="medium")
59
+
60
+ text_input = gr.Textbox(lines=8, placeholder="Paste long text here...", label="Input Text")
61
+ file_input = gr.File(label="Or Upload a .txt file")
62
+ summarize_button = gr.Button("Summarize ✨")
63
+
64
+ summary_output = gr.Textbox(lines=6, label="Summarized Text", interactive=False)
65
+
66
+ file_input.change(process_file, inputs=file_input, outputs=text_input)
67
+ summarize_button.click(summarize_text, inputs=[text_input, method_choice, length_choice], outputs=summary_output)
68
+
69
+ # Launch app
70
+ if __name__ == "__main__":
71
+ iface.launch()