adithya747 commited on
Commit
bd94f82
Β·
verified Β·
1 Parent(s): 90a9f3d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -48
app.py CHANGED
@@ -1,69 +1,77 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
- from transformers import pipeline
5
 
6
  # Load summarization pipeline
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 
8
 
9
  def scrape_website(url):
10
- """Extracts text from a website with error handling"""
11
  try:
12
  headers = {'User-Agent': 'Mozilla/5.0'}
13
  response = requests.get(url, headers=headers, timeout=10)
14
  response.raise_for_status()
15
 
16
  soup = BeautifulSoup(response.text, "html.parser")
 
 
 
 
 
 
 
17
  text_elements = soup.find_all(['p', 'article', 'main', 'section'])
18
  text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
19
- return text.strip() if text.strip() else "No content found"
 
 
 
20
 
21
  except Exception as e:
22
  return f"Scraping Error: {str(e)}"
23
 
 
 
 
 
 
24
  def summarize_website(url):
25
- """Handles the full summarization pipeline"""
26
  try:
27
- with gr.Column(variant="panel"):
28
- gr.Markdown("## ⚑ Processing...")
29
-
30
- extracted_text = scrape_website(url)
31
-
32
- if "Error" in extracted_text:
33
- return f"❌ {extracted_text}"
34
-
35
- if len(extracted_text.split()) < 50:
36
- return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
37
-
38
- max_input_length = 1000
39
- truncated_text = extracted_text[:max_input_length]
40
-
41
- summary = summarizer(
42
- truncated_text,
43
- max_length=200,
44
- min_length=50,
45
- do_sample=False,
46
- truncation=True
47
- )
48
-
49
- return f"## πŸ“ Summary\n\n{summary[0]['summary_text']}"
50
-
51
  except Exception as e:
52
  return f"β›” Summarization Error: {str(e)}"
53
 
54
- # Custom CSS for mobile optimization
55
  css = """
56
  @media screen and (max-width: 600px) {
57
- .container {
58
- padding: 10px !important;
59
- }
60
- .input-box textarea {
61
- font-size: 16px !important;
62
- }
63
  }
64
  """
65
 
66
- # Mobile-optimized interface with Blocks API
67
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
68
  gr.Markdown("# 🌐 AI Website Summarizer")
69
  gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
@@ -81,9 +89,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as a
81
  submit_btn = gr.Button("Generate Summary πŸš€", variant="primary")
82
  clear_btn = gr.Button("Clear πŸ”„")
83
 
 
84
  output = gr.Markdown()
85
 
86
- # Example section
87
  gr.Examples(
88
  examples=[
89
  ["https://en.wikipedia.org/wiki/Large_language_model"],
@@ -94,27 +102,23 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as a
94
  examples_per_page=2
95
  )
96
 
97
- # Progress indicator
98
- progress = gr.Textbox(visible=False)
99
-
100
- # Event handlers
101
  submit_btn.click(
102
  fn=summarize_website,
103
  inputs=url_input,
104
- outputs=output,
105
  api_name="summarize"
106
  )
107
 
108
  clear_btn.click(
109
- fn=lambda: ("", ""),
110
  inputs=None,
111
- outputs=[url_input, output],
112
  queue=False
113
  )
114
 
115
- # Mobile-friendly configuration
116
  app.launch(
117
- server_name="0.0.0.0",
118
- server_port=7860,
119
  favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
120
- )
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ from transformers import pipeline, AutoTokenizer
5
 
6
  # Load summarization pipeline
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
+ tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
9
 
10
  def scrape_website(url):
11
+ """Enhanced extraction with metadata support"""
12
  try:
13
  headers = {'User-Agent': 'Mozilla/5.0'}
14
  response = requests.get(url, headers=headers, timeout=10)
15
  response.raise_for_status()
16
 
17
  soup = BeautifulSoup(response.text, "html.parser")
18
+
19
+ # Extract title and meta description
20
+ title = soup.title.string.strip() if soup.title else ""
21
+ meta_desc = soup.find("meta", attrs={"name": "description"})
22
+ meta_desc = meta_desc["content"].strip() if meta_desc else ""
23
+
24
+ # Extract main text content
25
  text_elements = soup.find_all(['p', 'article', 'main', 'section'])
26
  text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
27
+
28
+ full_content = f"{title}\n{meta_desc}\n{text}".strip()
29
+
30
+ return full_content if full_content else "No meaningful content found."
31
 
32
  except Exception as e:
33
  return f"Scraping Error: {str(e)}"
34
 
35
+ def truncate_text(text, max_tokens=1024):
36
+ """Properly truncates text at the token level."""
37
+ tokens = tokenizer.tokenize(text)
38
+ return tokenizer.convert_tokens_to_string(tokens[:max_tokens])
39
+
40
  def summarize_website(url):
41
+ """Updated function with real-time status"""
42
  try:
43
+ extracted_text = scrape_website(url)
44
+
45
+ if "Error" in extracted_text:
46
+ return "❌ " + extracted_text
47
+
48
+ if len(extracted_text.split()) < 50:
49
+ return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
50
+
51
+ truncated_text = truncate_text(extracted_text)
52
+
53
+ summary = summarizer(
54
+ truncated_text,
55
+ max_length=250, # Increased summary length
56
+ min_length=80, # Ensuring more detailed output
57
+ do_sample=False
58
+ )
59
+
60
+ return f"## πŸ“ Summary\n\n{summary[0]['summary_text']}"
61
+
 
 
 
 
 
62
  except Exception as e:
63
  return f"β›” Summarization Error: {str(e)}"
64
 
65
+ # Custom CSS for better mobile experience
66
  css = """
67
  @media screen and (max-width: 600px) {
68
+ .container { padding: 10px !important; }
69
+ .input-box textarea { font-size: 18px !important; }
70
+ .gr-button { width: 100% !important; }
 
 
 
71
  }
72
  """
73
 
74
+ # Mobile-optimized interface with real-time updates
75
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
76
  gr.Markdown("# 🌐 AI Website Summarizer")
77
  gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
 
89
  submit_btn = gr.Button("Generate Summary πŸš€", variant="primary")
90
  clear_btn = gr.Button("Clear πŸ”„")
91
 
92
+ status = gr.Markdown("πŸ”„ Ready for input...", elem_id="status-msg")
93
  output = gr.Markdown()
94
 
 
95
  gr.Examples(
96
  examples=[
97
  ["https://en.wikipedia.org/wiki/Large_language_model"],
 
102
  examples_per_page=2
103
  )
104
 
 
 
 
 
105
  submit_btn.click(
106
  fn=summarize_website,
107
  inputs=url_input,
108
+ outputs=[output],
109
  api_name="summarize"
110
  )
111
 
112
  clear_btn.click(
113
+ fn=lambda: ("", "πŸ”„ Ready for input..."),
114
  inputs=None,
115
+ outputs=[url_input, status],
116
  queue=False
117
  )
118
 
119
+ # Mobile-friendly deployment
120
  app.launch(
121
+ server_name="0.0.0.0",
122
+ server_port=7860,
123
  favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
124
+ )