adithya747 commited on
Commit
6078dfb
Β·
verified Β·
1 Parent(s): bd94f82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -49
app.py CHANGED
@@ -1,81 +1,75 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
- from transformers import pipeline, AutoTokenizer
5
 
6
  # Load summarization pipeline
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
- tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
9
 
10
  def scrape_website(url):
11
- """Enhanced extraction with metadata support"""
12
  try:
13
  headers = {'User-Agent': 'Mozilla/5.0'}
14
  response = requests.get(url, headers=headers, timeout=10)
15
  response.raise_for_status()
16
 
17
  soup = BeautifulSoup(response.text, "html.parser")
18
-
19
- # Extract title and meta description
20
- title = soup.title.string.strip() if soup.title else ""
21
- meta_desc = soup.find("meta", attrs={"name": "description"})
22
- meta_desc = meta_desc["content"].strip() if meta_desc else ""
23
-
24
- # Extract main text content
25
  text_elements = soup.find_all(['p', 'article', 'main', 'section'])
26
  text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
27
-
28
- full_content = f"{title}\n{meta_desc}\n{text}".strip()
29
-
30
- return full_content if full_content else "No meaningful content found."
31
-
32
  except Exception as e:
33
  return f"Scraping Error: {str(e)}"
34
 
35
- def truncate_text(text, max_tokens=1024):
36
- """Properly truncates text at the token level."""
37
- tokens = tokenizer.tokenize(text)
38
- return tokenizer.convert_tokens_to_string(tokens[:max_tokens])
39
-
40
  def summarize_website(url):
41
- """Updated function with real-time status"""
42
  try:
43
  extracted_text = scrape_website(url)
44
-
45
  if "Error" in extracted_text:
46
- return "❌ " + extracted_text
47
 
48
  if len(extracted_text.split()) < 50:
49
  return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
50
-
51
- truncated_text = truncate_text(extracted_text)
52
-
 
 
 
53
  summary = summarizer(
54
  truncated_text,
55
- max_length=250, # Increased summary length
56
- min_length=80, # Ensuring more detailed output
57
  do_sample=False
58
  )
59
-
 
 
 
60
  return f"## πŸ“ Summary\n\n{summary[0]['summary_text']}"
61
-
62
  except Exception as e:
63
  return f"β›” Summarization Error: {str(e)}"
64
 
65
- # Custom CSS for better mobile experience
66
  css = """
67
  @media screen and (max-width: 600px) {
68
- .container { padding: 10px !important; }
69
- .input-box textarea { font-size: 18px !important; }
70
- .gr-button { width: 100% !important; }
 
 
 
71
  }
72
  """
73
 
74
- # Mobile-optimized interface with real-time updates
75
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
76
  gr.Markdown("# 🌐 AI Website Summarizer")
77
  gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
78
-
79
  with gr.Row():
80
  url_input = gr.Textbox(
81
  label="Website URL",
@@ -84,14 +78,14 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as a
84
  max_lines=1,
85
  elem_id="input-box"
86
  )
87
-
88
  with gr.Row():
89
  submit_btn = gr.Button("Generate Summary πŸš€", variant="primary")
90
  clear_btn = gr.Button("Clear πŸ”„")
91
-
92
- status = gr.Markdown("πŸ”„ Ready for input...", elem_id="status-msg")
93
  output = gr.Markdown()
94
-
 
95
  gr.Examples(
96
  examples=[
97
  ["https://en.wikipedia.org/wiki/Large_language_model"],
@@ -101,24 +95,27 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as a
101
  label="Try these examples:",
102
  examples_per_page=2
103
  )
104
-
 
 
 
 
105
  submit_btn.click(
106
  fn=summarize_website,
107
  inputs=url_input,
108
- outputs=[output],
109
  api_name="summarize"
110
  )
111
-
112
  clear_btn.click(
113
- fn=lambda: ("", "πŸ”„ Ready for input..."),
114
  inputs=None,
115
- outputs=[url_input, status],
116
  queue=False
117
  )
118
 
119
- # Mobile-friendly deployment
120
  app.launch(
121
- server_name="0.0.0.0",
122
- server_port=7860,
123
- favicon_path="https://www.svgrepo.com/show/355037/huggingface.svg"
124
  )
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ from transformers import pipeline
5
 
6
  # Load summarization pipeline
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 
8
 
9
  def scrape_website(url):
10
+ """Extracts text from a website with error handling"""
11
  try:
12
  headers = {'User-Agent': 'Mozilla/5.0'}
13
  response = requests.get(url, headers=headers, timeout=10)
14
  response.raise_for_status()
15
 
16
  soup = BeautifulSoup(response.text, "html.parser")
 
 
 
 
 
 
 
17
  text_elements = soup.find_all(['p', 'article', 'main', 'section'])
18
  text = " ".join([e.get_text(strip=True, separator=' ') for e in text_elements])
19
+
20
+ return text.strip() if text.strip() else "No content found"
21
+
 
 
22
  except Exception as e:
23
  return f"Scraping Error: {str(e)}"
24
 
 
 
 
 
 
25
  def summarize_website(url):
26
+ """Handles website summarization with proper truncation"""
27
  try:
28
  extracted_text = scrape_website(url)
29
+
30
  if "Error" in extracted_text:
31
+ return f"❌ {extracted_text}"
32
 
33
  if len(extracted_text.split()) < 50:
34
  return "⚠️ Error: Insufficient content for summarization (minimum 50 words required)"
35
+
36
+ # Ensure input is within token limit
37
+ max_input_tokens = 1024 # Model limit
38
+ truncated_text = " ".join(extracted_text.split()[:max_input_tokens])
39
+
40
+ # Summarization
41
  summary = summarizer(
42
  truncated_text,
43
+ max_length=250, # Extended summary
44
+ min_length=80,
45
  do_sample=False
46
  )
47
+
48
+ if not summary or not isinstance(summary, list) or "summary_text" not in summary[0]:
49
+ return "⚠️ Error: Summarization failed. Try a different website."
50
+
51
  return f"## πŸ“ Summary\n\n{summary[0]['summary_text']}"
52
+
53
  except Exception as e:
54
  return f"β›” Summarization Error: {str(e)}"
55
 
56
+ # Custom CSS for mobile optimization
57
  css = """
58
  @media screen and (max-width: 600px) {
59
+ .container {
60
+ padding: 10px !important;
61
+ }
62
+ .input-box textarea {
63
+ font-size: 16px !important;
64
+ }
65
  }
66
  """
67
 
68
+ # Mobile-optimized interface with Blocks API
69
  with gr.Blocks(theme=gr.themes.Soft(), css=css, title="Website Summarizer") as app:
70
  gr.Markdown("# 🌐 AI Website Summarizer")
71
  gr.Markdown("Paste any website URL below to get an instant AI-powered summary!")
72
+
73
  with gr.Row():
74
  url_input = gr.Textbox(
75
  label="Website URL",
 
78
  max_lines=1,
79
  elem_id="input-box"
80
  )
81
+
82
  with gr.Row():
83
  submit_btn = gr.Button("Generate Summary πŸš€", variant="primary")
84
  clear_btn = gr.Button("Clear πŸ”„")
85
+
 
86
  output = gr.Markdown()
87
+
88
+ # Example section
89
  gr.Examples(
90
  examples=[
91
  ["https://en.wikipedia.org/wiki/Large_language_model"],
 
95
  label="Try these examples:",
96
  examples_per_page=2
97
  )
98
+
99
+ # Progress indicator
100
+ progress = gr.Textbox(visible=False)
101
+
102
+ # Event handlers
103
  submit_btn.click(
104
  fn=summarize_website,
105
  inputs=url_input,
106
+ outputs=output,
107
  api_name="summarize"
108
  )
109
+
110
  clear_btn.click(
111
+ fn=lambda: ("", ""),
112
  inputs=None,
113
+ outputs=[url_input, output],
114
  queue=False
115
  )
116
 
117
+ # Launch the app without broken favicon
118
  app.launch(
119
+ server_name="0.0.0.0",
120
+ server_port=7860
 
121
  )