CoderBurntt commited on
Commit
fea1a71
·
verified ·
1 Parent(s): d26a26e

Update tools/visit_webpage.py

Browse files
Files changed (1) hide show
  1. tools/visit_webpage.py +11 -9
tools/visit_webpage.py CHANGED
@@ -3,7 +3,7 @@ from smolagents.tools import Tool
3
  import requests
4
  import markdownify
5
  import smolagents
6
- import re # Import the re module for regular expressions
7
 
8
  class VisitWebpageTool(Tool):
9
  name = "visit_webpage"
@@ -13,7 +13,7 @@ class VisitWebpageTool(Tool):
13
 
14
  def __init__(self, *args, **kwargs):
15
  super().__init__(*args, **kwargs)
16
- self.is_initialized = True # Example initialization logic
17
 
18
  def forward(self, url: str) -> str:
19
  try:
@@ -27,17 +27,19 @@ class VisitWebpageTool(Tool):
27
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
28
  ) from e
29
  try:
30
- # Send a GET request to the URL with a 20-second timeout
31
- response = requests.get(url, timeout=20)
32
- response.raise_for_status() # Raise an exception for bad status codes
 
33
 
34
- # Convert the HTML content to Markdown
35
  markdown_content = markdownify(response.text).strip()
36
-
37
- # Remove multiple line breaks
38
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
39
 
40
  return truncate_content(markdown_content, 10000)
41
 
42
  except requests.exceptions.Timeout:
43
- return
 
 
 
 
 
3
  import requests
4
  import markdownify
5
  import smolagents
6
+ import re
7
 
8
  class VisitWebpageTool(Tool):
9
  name = "visit_webpage"
 
13
 
14
  def __init__(self, *args, **kwargs):
15
  super().__init__(*args, **kwargs)
16
+ self.is_initialized = True
17
 
18
  def forward(self, url: str) -> str:
19
  try:
 
27
  "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
28
  ) from e
29
  try:
30
+ # Add User-Agent header to mimic a browser
31
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
32
+ response = requests.get(url, headers=headers, timeout=20)
33
+ response.raise_for_status()
34
 
 
35
  markdown_content = markdownify(response.text).strip()
 
 
36
  markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
37
 
38
  return truncate_content(markdown_content, 10000)
39
 
40
  except requests.exceptions.Timeout:
41
+ return "The request timed out. Please try again later or check the URL."
42
+ except RequestException as e:
43
+ return f"Error fetching the webpage: {str(e)}"
44
+ except Exception as e:
45
+ return f"An unexpected error occurred: {str(e)}"