import gradio as gr from transformers import pipeline import networkx as nx import numpy as np import re import nltk from nltk.tokenize import sent_tokenize from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity # Download NLTK data nltk.download('punkt') # Load Transformer model for abstractive summarization summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Extractive summarization using TextRank def extractive_summarization(text, num_sentences=3): sentences = sent_tokenize(text) # Handle cases where input text is too short if len(sentences) <= num_sentences: return "Text is too short for extractive summarization." try: vectorizer = TfidfVectorizer(stop_words="english") sentence_vectors = vectorizer.fit_transform(sentences) # Handle cases where vectorization fails due to low variation in text if sentence_vectors.shape[0] < num_sentences: return "Insufficient unique content for extractive summarization." similarity_matrix = cosine_similarity(sentence_vectors) graph = nx.from_numpy_array(similarity_matrix) scores = nx.pagerank(graph) ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) return " ".join([s for _, s in ranked_sentences[:num_sentences]]) except Exception as e: return f"Error in extractive summarization: {str(e)}" # Abstractive summarization using BART def abstractive_summarization(text, length): if len(text.split()) < 30: return "Text is too short for summarization." max_length = {"short": 50, "medium": 100, "long": 150}[length] summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text'] return summary # Main function def summarize_text(text, method, length): if method == "Abstractive (BART)": return abstractive_summarization(text, length) else: num_sentences = {"short": 2, "medium": 4, "long": 6}[length] return extractive_summarization(text, num_sentences) # Function to process file upload def process_file(file): return file.read().decode("utf-8") # UI with Gradio with gr.Blocks(theme=gr.themes.Soft()) as iface: gr.Markdown("# 📄 AI-Powered Text Summarizer") gr.Markdown("Summarize long articles, news, and research papers using advanced NLP models.") with gr.Row(): method_choice = gr.Radio(["Abstractive (BART)", "Extractive (TextRank)"], label="Summarization Type", value="Abstractive (BART)") length_choice = gr.Radio(["short", "medium", "long"], label="Summary Length", value="medium") text_input = gr.Textbox(lines=8, placeholder="Paste long text here...", label="Input Text") file_input = gr.File(label="Or Upload a .txt file") summarize_button = gr.Button("Summarize ✨") summary_output = gr.Textbox(lines=6, label="Summarized Text", interactive=False) file_input.change(process_file, inputs=file_input, outputs=text_input) summarize_button.click(summarize_text, inputs=[text_input, method_choice, length_choice], outputs=summary_output) # Launch app if __name__ == "__main__": iface.launch()