adithya747 commited on
Commit
1936100
·
verified ·
1 Parent(s): c0d7d7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -12
app.py CHANGED
@@ -1,22 +1,40 @@
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
 
4
 
5
- def summarize_website(url):
 
 
 
 
6
  try:
7
- response = requests.get(url)
8
  soup = BeautifulSoup(response.text, "html.parser")
9
  paragraphs = soup.find_all("p")
10
-
11
- # Extract more content (e.g., first 10 paragraphs)
12
- text = "\n\n".join([p.get_text() for p in paragraphs[:10]])
13
-
14
- # Format text as Markdown
15
- markdown_summary = f"## Website Summary\n\n{text}" if text else "No content found."
16
-
17
- return markdown_summary
18
  except Exception as e:
19
- return f"**Error:** {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- iface = gr.Interface(fn=summarize_website, inputs="text", outputs=gr.Markdown(), title="Website Summarizer")
22
  iface.launch()
 
1
  import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ from transformers import pipeline
5
 
6
+ # Load summarization pipeline from Hugging Face
7
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
+
9
+ def scrape_website(url):
10
+ """Extracts text from a website."""
11
  try:
12
+ response = requests.get(url, timeout=10)
13
  soup = BeautifulSoup(response.text, "html.parser")
14
  paragraphs = soup.find_all("p")
15
+ text = " ".join([p.get_text() for p in paragraphs])
16
+ return text if text else "No content found."
 
 
 
 
 
 
17
  except Exception as e:
18
+ return f"Error: {str(e)}"
19
+
20
+ def summarize_website(url):
21
+ """Scrapes website and summarizes the extracted content."""
22
+ extracted_text = scrape_website(url)
23
+
24
+ if "Error:" in extracted_text or len(extracted_text.split()) < 50:
25
+ return "Could not extract enough text to summarize."
26
+
27
+ # Summarize using Hugging Face model
28
+ summary = summarizer(extracted_text, max_length=200, min_length=50, do_sample=False)
29
+ return f"**Summary:**\n\n{summary[0]['summary_text']}"
30
+
31
+ # Gradio interface
32
+ iface = gr.Interface(
33
+ fn=summarize_website,
34
+ inputs="text",
35
+ outputs="markdown",
36
+ title="AI-Powered Website Summarizer",
37
+ description="Enter a website URL, and this tool will summarize its content using an AI model."
38
+ )
39
 
 
40
  iface.launch()