KSh100 commited on
Commit
2a808f8
·
verified ·
1 Parent(s): 09c9f25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -25
app.py CHANGED
@@ -1,8 +1,7 @@
1
- import re
2
  import urllib3
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urljoin
5
- import gradio as gr
6
 
7
  # --- Custom HTTP Session and Response Classes ---
8
 
@@ -63,42 +62,28 @@ def extract_images(soup, base_url):
63
  def format_detailed_output(structured_data):
64
  """Formats the structured data into a Markdown string."""
65
  result = "### Structured Page Content\n\n"
66
-
67
- # Texts
68
- result += "**Texts:**\n"
69
- if structured_data["Texts"]:
70
- result += " ".join(structured_data["Texts"]) + "\n\n"
71
- else:
72
- result += "No textual content found.\n\n"
73
-
74
- # Links
75
  result += "**Links:**\n"
76
  if structured_data["Links"]:
77
- result += "\n".join(f"[{link['Text']}]({link['URL']})" for link in structured_data["Links"]) + "\n\n"
78
  else:
79
- result += "No links found.\n\n"
80
-
81
- # Images
82
  result += "**Images:**\n"
83
  if structured_data["Images"]:
84
- result += "\n".join(f"![{img['Alt Text']}]({img['Image URL']})" for img in structured_data["Images"]) + "\n\n"
85
  else:
86
- result += "No images found.\n\n"
87
-
88
  return result
89
 
90
- # --- Web Page Processing Functions ---
91
 
92
  def download_and_process_web_page(url):
93
- """Downloads a web page from a URL and processes its content."""
94
  if not url.startswith("http://") and not url.startswith("https://"):
95
  url = "http://" + url # Prepend "http://" if not present
96
 
97
  try:
98
  response = get(url)
99
- if response.status_code != 200:
100
- return f"Error: Received status code {response.status_code}"
101
-
102
  soup = response.soup()
103
  structured_data = {
104
  "Texts": extract_texts(soup),
@@ -122,8 +107,8 @@ iface = gr.Interface(
122
  outputs=[
123
  gr.Markdown(label="Web Page Content"),
124
  ],
125
- title="Enhanced Web Page Processor for Hugging Face Chat Tools",
126
- description="Enter the URL of a web page. The tool will extract and format its content, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools. \n [https://hf.co/chat/tools/66f1a8159d41ad4398ebb711](https://hf.co/chat/tools/66f1a8159d41ad4398ebb711)",
127
  concurrency_limit=None,
128
  api_name="main"
129
  )
 
1
+ import gradio as gr
2
  import urllib3
3
  from bs4 import BeautifulSoup
4
  from urllib.parse import urljoin
 
5
 
6
  # --- Custom HTTP Session and Response Classes ---
7
 
 
62
  def format_detailed_output(structured_data):
63
  """Formats the structured data into a Markdown string."""
64
  result = "### Structured Page Content\n\n"
65
+ result += "**Texts:**\n" + (" ".join(structured_data["Texts"]) if structured_data["Texts"] else "No textual content found.") + "\n\n"
 
 
 
 
 
 
 
 
66
  result += "**Links:**\n"
67
  if structured_data["Links"]:
68
+ result += "\n".join(f"[{link['Text']}]({link['URL']})" for link in structured_data["Links"]) + "\n"
69
  else:
70
+ result += "No links found.\n"
 
 
71
  result += "**Images:**\n"
72
  if structured_data["Images"]:
73
+ result += "\n".join(f"![{img['Alt Text']}]({img['Image URL']})" for img in structured_data["Images"]) + "\n"
74
  else:
75
+ result += "No images found.\n"
 
76
  return result
77
 
78
+ # --- Web Page Processing Function ---
79
 
80
  def download_and_process_web_page(url):
81
+ """Downloads a web page and returns the structured content."""
82
  if not url.startswith("http://") and not url.startswith("https://"):
83
  url = "http://" + url # Prepend "http://" if not present
84
 
85
  try:
86
  response = get(url)
 
 
 
87
  soup = response.soup()
88
  structured_data = {
89
  "Texts": extract_texts(soup),
 
107
  outputs=[
108
  gr.Markdown(label="Web Page Content"),
109
  ],
110
+ title="Web Page Processor for Hugging Face Chat Tools",
111
+ description="Enter the URL of a web page. The tool will extract and display the structured content of the page, including text, links, and images. This tool is designed for use with Hugging Face Chat Tools.",
112
  concurrency_limit=None,
113
  api_name="main"
114
  )