Spaces:

KSh100
/

websearch

Sleeping

App Files Files Community

KSh100 commited on Mar 26

Commit

4d9e1a7

verified ·

1 Parent(s): 2a808f8

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -21

app.py CHANGED Viewed

@@ -3,8 +3,7 @@ import urllib3
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
-# --- Custom HTTP Session and Response Classes ---
 class CustomSession:
     def __init__(self):
         self.pool_manager = urllib3.PoolManager()
@@ -22,19 +21,11 @@ class CustomResponse:
     def soup(self):
         return BeautifulSoup(self.content, 'lxml')
-    def clean_text(self):
-        soup = self.soup()
-        cleaned_text = soup.get_text().replace('\n', ' ').replace('\r', ' ').replace('  ', ' ')
-        while '  ' in cleaned_text:
-            cleaned_text = cleaned_text.replace('  ', ' ')
-        return cleaned_text.strip()
 def get(url):
     session = CustomSession()
     return session.get(url)
-# --- Utility Functions ---
 def extract_texts(soup):
     """Extracts all text content from the soup."""
     return [text for text in soup.stripped_strings]
@@ -75,8 +66,7 @@ def format_detailed_output(structured_data):
         result += "No images found.\n"
     return result
-# --- Web Page Processing Function ---
 def download_and_process_web_page(url):
     """Downloads a web page and returns the structured content."""
     if not url.startswith("http://") and not url.startswith("https://"):
@@ -97,19 +87,18 @@ def download_and_process_web_page(url):
     except Exception as e:
         return f"Error processing web page: {e}"
-# --- Gradio Interface ---
 iface = gr.Interface(
     fn=download_and_process_web_page,
-    inputs=[
-        gr.Textbox(lines=1, placeholder="Enter URL of the web page"),
-    ],
-    outputs=[
-        gr.Markdown(label="Web Page Content"),
-    ],
     title="Web Page Processor for Hugging Face Chat Tools",
     description="Enter the URL of a web page. The tool will extract and display the structured content of the page, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools.",
     concurrency_limit=None,
     api_name="main"
 )

 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
+# Custom HTTP Session and Response Classes
 class CustomSession:
     def __init__(self):
         self.pool_manager = urllib3.PoolManager()
     def soup(self):
         return BeautifulSoup(self.content, 'lxml')
 def get(url):
     session = CustomSession()
     return session.get(url)
+# Utility Functions
 def extract_texts(soup):
     """Extracts all text content from the soup."""
     return [text for text in soup.stripped_strings]
         result += "No images found.\n"
     return result
+# Web Page Processing Function
 def download_and_process_web_page(url):
     """Downloads a web page and returns the structured content."""
     if not url.startswith("http://") and not url.startswith("https://"):
     except Exception as e:
         return f"Error processing web page: {e}"
+# Gradio Interface
 iface = gr.Interface(
     fn=download_and_process_web_page,
+    inputs=gr.Textbox(lines=1, placeholder="Enter URL of the web page"),
+    outputs=gr.Markdown(label="Web Page Content"),
     title="Web Page Processor for Hugging Face Chat Tools",
     description="Enter the URL of a web page. The tool will extract and display the structured content of the page, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools.",
     concurrency_limit=None,
+    api_mane="main"
+)
+iface.launch()
     api_name="main"
 )