Spaces:

KSh100
/

websearch

Sleeping

App Files Files Community

KSh100 commited on Mar 26

Commit

2a808f8

verified ·

1 Parent(s): 09c9f25

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -25

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
-import re
 import urllib3
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
-import gradio as gr
 # --- Custom HTTP Session and Response Classes ---
@@ -63,42 +62,28 @@ def extract_images(soup, base_url):
 def format_detailed_output(structured_data):
     """Formats the structured data into a Markdown string."""
     result = "### Structured Page Content\n\n"
-    # Texts
-    result += "**Texts:**\n"
-    if structured_data["Texts"]:
-        result += " ".join(structured_data["Texts"]) + "\n\n"
-    else:
-        result += "No textual content found.\n\n"
-    # Links
     result += "**Links:**\n"
     if structured_data["Links"]:
-        result += "\n".join(f"[{link['Text']}]({link['URL']})" for link in structured_data["Links"]) + "\n\n"
     else:
-        result += "No links found.\n\n"
-    # Images
     result += "**Images:**\n"
     if structured_data["Images"]:
-        result += "\n".join(f"![{img['Alt Text']}]({img['Image URL']})" for img in structured_data["Images"]) + "\n\n"
     else:
-        result += "No images found.\n\n"
     return result
-# --- Web Page Processing Functions ---
 def download_and_process_web_page(url):
-    """Downloads a web page from a URL and processes its content."""
     if not url.startswith("http://") and not url.startswith("https://"):
         url = "http://" + url  # Prepend "http://" if not present
     try:
         response = get(url)
-        if response.status_code != 200:
-            return f"Error: Received status code {response.status_code}"
         soup = response.soup()
         structured_data = {
             "Texts": extract_texts(soup),
@@ -122,8 +107,8 @@ iface = gr.Interface(
     outputs=[
         gr.Markdown(label="Web Page Content"),
     ],
-    title="Enhanced Web Page Processor for Hugging Face Chat Tools",
-    description="Enter the URL of a web page. The tool will extract and format its content, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools. \n [https://hf.co/chat/tools/66f1a8159d41ad4398ebb711](https://hf.co/chat/tools/66f1a8159d41ad4398ebb711)",
     concurrency_limit=None,
     api_name="main"
 )

+import gradio as gr
 import urllib3
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin
 # --- Custom HTTP Session and Response Classes ---
 def format_detailed_output(structured_data):
     """Formats the structured data into a Markdown string."""
     result = "### Structured Page Content\n\n"
+    result += "**Texts:**\n" + (" ".join(structured_data["Texts"]) if structured_data["Texts"] else "No textual content found.") + "\n\n"
     result += "**Links:**\n"
     if structured_data["Links"]:
+        result += "\n".join(f"[{link['Text']}]({link['URL']})" for link in structured_data["Links"]) + "\n"
     else:
+        result += "No links found.\n"
     result += "**Images:**\n"
     if structured_data["Images"]:
+        result += "\n".join(f"![{img['Alt Text']}]({img['Image URL']})" for img in structured_data["Images"]) + "\n"
     else:
+        result += "No images found.\n"
     return result
+# --- Web Page Processing Function ---
 def download_and_process_web_page(url):
+    """Downloads a web page and returns the structured content."""
     if not url.startswith("http://") and not url.startswith("https://"):
         url = "http://" + url  # Prepend "http://" if not present
     try:
         response = get(url)
         soup = response.soup()
         structured_data = {
             "Texts": extract_texts(soup),
     outputs=[
         gr.Markdown(label="Web Page Content"),
     ],
+    title="Web Page Processor for Hugging Face Chat Tools",
+    description="Enter the URL of a web page. The tool will extract and display the structured content of the page, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools.",
     concurrency_limit=None,
     api_name="main"
 )