KSh100 commited on
Commit
4d9e1a7
·
verified ·
1 Parent(s): 2a808f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -21
app.py CHANGED
@@ -3,8 +3,7 @@ import urllib3
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urljoin
5
 
6
- # --- Custom HTTP Session and Response Classes ---
7
-
8
  class CustomSession:
9
  def __init__(self):
10
  self.pool_manager = urllib3.PoolManager()
@@ -22,19 +21,11 @@ class CustomResponse:
22
  def soup(self):
23
  return BeautifulSoup(self.content, 'lxml')
24
 
25
- def clean_text(self):
26
- soup = self.soup()
27
- cleaned_text = soup.get_text().replace('\n', ' ').replace('\r', ' ').replace(' ', ' ')
28
- while ' ' in cleaned_text:
29
- cleaned_text = cleaned_text.replace(' ', ' ')
30
- return cleaned_text.strip()
31
-
32
  def get(url):
33
  session = CustomSession()
34
  return session.get(url)
35
 
36
- # --- Utility Functions ---
37
-
38
  def extract_texts(soup):
39
  """Extracts all text content from the soup."""
40
  return [text for text in soup.stripped_strings]
@@ -75,8 +66,7 @@ def format_detailed_output(structured_data):
75
  result += "No images found.\n"
76
  return result
77
 
78
- # --- Web Page Processing Function ---
79
-
80
  def download_and_process_web_page(url):
81
  """Downloads a web page and returns the structured content."""
82
  if not url.startswith("http://") and not url.startswith("https://"):
@@ -97,19 +87,18 @@ def download_and_process_web_page(url):
97
  except Exception as e:
98
  return f"Error processing web page: {e}"
99
 
100
- # --- Gradio Interface ---
101
-
102
  iface = gr.Interface(
103
  fn=download_and_process_web_page,
104
- inputs=[
105
- gr.Textbox(lines=1, placeholder="Enter URL of the web page"),
106
- ],
107
- outputs=[
108
- gr.Markdown(label="Web Page Content"),
109
- ],
110
  title="Web Page Processor for Hugging Face Chat Tools",
111
  description="Enter the URL of a web page. The tool will extract and display the structured content of the page, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools.",
112
  concurrency_limit=None,
 
 
 
 
113
  api_name="main"
114
  )
115
 
 
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urljoin
5
 
6
+ # Custom HTTP Session and Response Classes
 
7
  class CustomSession:
8
  def __init__(self):
9
  self.pool_manager = urllib3.PoolManager()
 
21
  def soup(self):
22
  return BeautifulSoup(self.content, 'lxml')
23
 
 
 
 
 
 
 
 
24
  def get(url):
25
  session = CustomSession()
26
  return session.get(url)
27
 
28
+ # Utility Functions
 
29
  def extract_texts(soup):
30
  """Extracts all text content from the soup."""
31
  return [text for text in soup.stripped_strings]
 
66
  result += "No images found.\n"
67
  return result
68
 
69
+ # Web Page Processing Function
 
70
  def download_and_process_web_page(url):
71
  """Downloads a web page and returns the structured content."""
72
  if not url.startswith("http://") and not url.startswith("https://"):
 
87
  except Exception as e:
88
  return f"Error processing web page: {e}"
89
 
90
+ # Gradio Interface
 
91
  iface = gr.Interface(
92
  fn=download_and_process_web_page,
93
+ inputs=gr.Textbox(lines=1, placeholder="Enter URL of the web page"),
94
+ outputs=gr.Markdown(label="Web Page Content"),
 
 
 
 
95
  title="Web Page Processor for Hugging Face Chat Tools",
96
  description="Enter the URL of a web page. The tool will extract and display the structured content of the page, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools.",
97
  concurrency_limit=None,
98
+ api_mane="main"
99
+ )
100
+
101
+ iface.launch()
102
  api_name="main"
103
  )
104