Spaces:
Build error
Build error
Refactor agent structure by modularizing agent implementations into separate directories for web, data analysis, and media agents. Remove legacy code from agents.py, prompts.py, and tools.py, enhancing maintainability. Update main_v2.py to reflect new import paths and agent initialization. Add new tools for enhanced functionality, including web searching and data extraction. Update requirements.txt to include necessary dependencies for new tools.
837e221
unverified
from typing import Dict, Any | |
import requests | |
from bs4 import BeautifulSoup | |
from smolagents import tool | |
def browse_webpage(url: str) -> Dict[str, Any]: | |
""" | |
Browse a webpage and extract its content. | |
Args: | |
url: URL of the webpage to browse | |
Returns: | |
Dictionary containing title, text content, and links from the webpage | |
""" | |
try: | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
} | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Extract title | |
title = soup.title.string if soup.title else "No title found" | |
# Extract main text content | |
paragraphs = soup.find_all("p") | |
text_content = "\n".join([p.get_text().strip() for p in paragraphs]) | |
# Extract links | |
links = [] | |
for link in soup.find_all("a", href=True): | |
href = link["href"] | |
text = link.get_text().strip() | |
if href.startswith("http"): | |
links.append({"text": text, "href": href}) | |
return {"title": title, "content": text_content, "links": links} | |
except Exception as e: | |
return {"error": str(e)} |