mjschock's picture
Refactor agent structure by modularizing agent implementations into separate directories for web, data analysis, and media agents. Remove legacy code from agents.py, prompts.py, and tools.py, enhancing maintainability. Update main_v2.py to reflect new import paths and agent initialization. Add new tools for enhanced functionality, including web searching and data extraction. Update requirements.txt to include necessary dependencies for new tools.
837e221 unverified
raw
history blame
1.35 kB
from typing import Dict, Any
import requests
from bs4 import BeautifulSoup
from smolagents import tool
@tool
def browse_webpage(url: str) -> Dict[str, Any]:
"""
Browse a webpage and extract its content.
Args:
url: URL of the webpage to browse
Returns:
Dictionary containing title, text content, and links from the webpage
"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Extract title
title = soup.title.string if soup.title else "No title found"
# Extract main text content
paragraphs = soup.find_all("p")
text_content = "\n".join([p.get_text().strip() for p in paragraphs])
# Extract links
links = []
for link in soup.find_all("a", href=True):
href = link["href"]
text = link.get_text().strip()
if href.startswith("http"):
links.append({"text": text, "href": href})
return {"title": title, "content": text_content, "links": links}
except Exception as e:
return {"error": str(e)}