adithya747's picture
Update app.py
bd94f82 verified
raw
history blame
4.06 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer
# Load summarization pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
def scrape_website(url):
"""Enhanced extraction with metadata support"""
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Extract title and meta description
title = soup.title.string.strip() if soup.title else ""
meta_desc = soup.find("meta", attrs={"name": "description"})
meta_desc = meta_desc["content"].strip() if meta_desc else ""
# Extract main text content
text_elements = soup.find_all(['p', 'article', 'main', 'section'])
text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
full_content = f"{title}\n{meta_desc}\n{text}".strip()
return full_content if full_content else "No meaningful content found."
except Exception as e:
return f"Scraping Error: {str(e)}"
def truncate_text(text, max_tokens=1024):
"""Properly truncates text at the token level."""
tokens = tokenizer.tokenize(text)
return tokenizer.convert_tokens_to_string(tokens[:max_tokens])
def summarize_website(url):
"""Updated function with real-time status"""
try:
extracted_text = scrape_website(url)
if "Error" in extracted_text:
return "❌ " + extracted_text
if len(extracted_text.split()) < 50:
return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
truncated_text = truncate_text(extracted_text)
summary = summarizer(
truncated_text,
max_length=250, # Increased summary length
min_length=80, # Ensuring more detailed output
do_sample=False
)
return f"## πŸ“ Summary\n\n{summary[0]['summary_text']}"
except Exception as e:
return f"β›” Summarization Error: {str(e)}"
# Custom CSS for better mobile experience
css = """
@media screen and (max-width: 600px) {
.container { padding: 10px !important; }
.input-box textarea { font-size: 18px !important; }
.gr-button { width: 100% !important; }
}
"""
# Mobile-optimized interface with real-time updates
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
gr.Markdown("# 🌐 AI Website Summarizer")
gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
with gr.Row():
url_input = gr.Textbox(
label="Website URL",
placeholder="Enter full URL (https://...)",
lines=1,
max_lines=1,
elem_id="input-box"
)
with gr.Row():
submit_btn = gr.Button("Generate Summary πŸš€", variant="primary")
clear_btn = gr.Button("Clear πŸ”„")
status = gr.Markdown("πŸ”„ Ready for input...", elem_id="status-msg")
output = gr.Markdown()
gr.Examples(
examples=[
["https://en.wikipedia.org/wiki/Large_language_model"],
["https://www.bbc.com/news/technology-66510295"]
],
inputs=url_input,
label="Try these examples:",
examples_per_page=2
)
submit_btn.click(
fn=summarize_website,
inputs=url_input,
outputs=[output],
api_name="summarize"
)
clear_btn.click(
fn=lambda: ("", "πŸ”„ Ready for input..."),
inputs=None,
outputs=[url_input, status],
queue=False
)
# Mobile-friendly deployment
app.launch(
server_name="0.0.0.0",
server_port=7860,
favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
)