Spaces:
Running
Running
File size: 4,063 Bytes
66a4d52 bd94f82 66a4d52 90a9f3d 501ba1b bd94f82 1936100 bd94f82 66a4d52 90a9f3d 501ba1b 90a9f3d 501ba1b 66a4d52 bd94f82 501ba1b bd94f82 501ba1b 66a4d52 501ba1b 1936100 bd94f82 1936100 bd94f82 501ba1b bd94f82 501ba1b 90a9f3d bd94f82 90a9f3d bd94f82 90a9f3d 1936100 bd94f82 90a9f3d bd94f82 90a9f3d bd94f82 90a9f3d bd94f82 90a9f3d bd94f82 90a9f3d 66a4d52 bd94f82 90a9f3d bd94f82 90a9f3d bd94f82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer
# Load summarization pipeline
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
def scrape_website(url):
"""Enhanced extraction with metadata support"""
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Extract title and meta description
title = soup.title.string.strip() if soup.title else ""
meta_desc = soup.find("meta", attrs={"name": "description"})
meta_desc = meta_desc["content"].strip() if meta_desc else ""
# Extract main text content
text_elements = soup.find_all(['p', 'article', 'main', 'section'])
text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
full_content = f"{title}\n{meta_desc}\n{text}".strip()
return full_content if full_content else "No meaningful content found."
except Exception as e:
return f"Scraping Error: {str(e)}"
def truncate_text(text, max_tokens=1024):
"""Properly truncates text at the token level."""
tokens = tokenizer.tokenize(text)
return tokenizer.convert_tokens_to_string(tokens[:max_tokens])
def summarize_website(url):
"""Updated function with real-time status"""
try:
extracted_text = scrape_website(url)
if "Error" in extracted_text:
return "β " + extracted_text
if len(extracted_text.split()) < 50:
return "β οΈ Error: Insufficient content for summarization (minimum 50 words required)"
truncated_text = truncate_text(extracted_text)
summary = summarizer(
truncated_text,
max_length=250, # Increased summary length
min_length=80, # Ensuring more detailed output
do_sample=False
)
return f"## π Summary\n\n{summary[0]['summary_text']}"
except Exception as e:
return f"β Summarization Error: {str(e)}"
# Custom CSS for better mobile experience
css = """
@media screen and (max-width: 600px) {
.container { padding: 10px !important; }
.input-box textarea { font-size: 18px !important; }
.gr-button { width: 100% !important; }
}
"""
# Mobile-optimized interface with real-time updates
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
gr.Markdown("# π AI Website Summarizer")
gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
with gr.Row():
url_input = gr.Textbox(
label="Website URL",
placeholder="Enter full URL (https://...)",
lines=1,
max_lines=1,
elem_id="input-box"
)
with gr.Row():
submit_btn = gr.Button("Generate Summary π", variant="primary")
clear_btn = gr.Button("Clear π")
status = gr.Markdown("π Ready for input...", elem_id="status-msg")
output = gr.Markdown()
gr.Examples(
examples=[
["https://en.wikipedia.org/wiki/Large_language_model"],
["https://www.bbc.com/news/technology-66510295"]
],
inputs=url_input,
label="Try these examples:",
examples_per_page=2
)
submit_btn.click(
fn=summarize_website,
inputs=url_input,
outputs=[output],
api_name="summarize"
)
clear_btn.click(
fn=lambda: ("", "π Ready for input..."),
inputs=None,
outputs=[url_input, status],
queue=False
)
# Mobile-friendly deployment
app.launch(
server_name="0.0.0.0",
server_port=7860,
favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
)
|