import gradio as gr import requests from bs4 import BeautifulSoup from transformers import pipeline, AutoTokenizer # Load summarization pipeline summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6") def scrape_website(url): """Enhanced extraction with metadata support""" try: headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract title and meta description title = soup.title.string.strip() if soup.title else "" meta_desc = soup.find("meta", attrs={"name": "description"}) meta_desc = meta_desc["content"].strip() if meta_desc else "" # Extract main text content text_elements = soup.find_all(['p', 'article', 'main', 'section']) text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements]) full_content = f"{title}\n{meta_desc}\n{text}".strip() return full_content if full_content else "No meaningful content found." except Exception as e: return f"Scraping Error: {str(e)}" def truncate_text(text, max_tokens=1024): """Properly truncates text at the token level.""" tokens = tokenizer.tokenize(text) return tokenizer.convert_tokens_to_string(tokens[:max_tokens]) def summarize_website(url): """Updated function with real-time status""" try: extracted_text = scrape_website(url) if "Error" in extracted_text: return "❌ " + extracted_text if len(extracted_text.split()) < 50: return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)" truncated_text = truncate_text(extracted_text) summary = summarizer( truncated_text, max_length=250, # Increased summary length min_length=80, # Ensuring more detailed output do_sample=False ) return f"## 📝 Summary\n\n{summary[0]['summary_text']}" except Exception as e: return f"⛔ Summarization Error: {str(e)}" # Custom CSS for better mobile experience css = """ @media screen and (max-width: 600px) { .container { padding: 10px !important; } .input-box textarea { font-size: 18px !important; } .gr-button { width: 100% !important; } } """ # Mobile-optimized interface with real-time updates with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app: gr.Markdown("# 🌐 AI Website Summarizer") gr.Markdown("Paste any website URL below to get an instant AI-powered summary!") with gr.Row(): url_input = gr.Textbox( label="Website URL", placeholder="Enter full URL (https://...)", lines=1, max_lines=1, elem_id="input-box" ) with gr.Row(): submit_btn = gr.Button("Generate Summary 🚀", variant="primary") clear_btn = gr.Button("Clear 🔄") status = gr.Markdown("🔄 Ready for input...", elem_id="status-msg") output = gr.Markdown() gr.Examples( examples=[ ["https://en.wikipedia.org/wiki/Large_language_model"], ["https://www.bbc.com/news/technology-66510295"] ], inputs=url_input, label="Try these examples:", examples_per_page=2 ) submit_btn.click( fn=summarize_website, inputs=url_input, outputs=[output], api_name="summarize" ) clear_btn.click( fn=lambda: ("", "🔄 Ready for input..."), inputs=None, outputs=[url_input, status], queue=False ) # Mobile-friendly deployment app.launch( server_name="0.0.0.0", server_port=7860, favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg" )