Spaces:
Build error
Build error
Add new agent functionalities by creating agents for web browsing, data analysis, and media handling in agents.py. Introduce various tools in tools.py for web searching, webpage browsing, image analysis, PDF reading, CSV parsing, and date extraction, enhancing the overall capabilities of the agent system.
55ef143
unverified
import io | |
import re | |
from typing import Any, Dict, List | |
import pandas as pd | |
import requests | |
from bs4 import BeautifulSoup | |
from PIL import Image | |
from smolagents import tool | |
from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool | |
def web_search(query: str) -> str: | |
""" | |
Search the web for information. | |
Args: | |
query: Search query to find information | |
Returns: | |
Search results as text | |
""" | |
# Using the built-in DuckDuckGo search tool from smolagents | |
# search_tool = DuckDuckGoSearchTool() | |
search_tool = DuckDuckGoSearchTool(max_results=3) | |
results = search_tool.execute(query) | |
return results | |
def browse_webpage(url: str) -> Dict[str, Any]: | |
""" | |
Browse a webpage and extract its content. | |
Args: | |
url: URL of the webpage to browse | |
Returns: | |
Dictionary containing title, text content, and links from the webpage | |
""" | |
try: | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
} | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Extract title | |
title = soup.title.string if soup.title else "No title found" | |
# Extract main text content | |
paragraphs = soup.find_all("p") | |
text_content = "\n".join([p.get_text().strip() for p in paragraphs]) | |
# Extract links | |
links = [] | |
for link in soup.find_all("a", href=True): | |
href = link["href"] | |
text = link.get_text().strip() | |
if href.startswith("http"): | |
links.append({"text": text, "href": href}) | |
return {"title": title, "content": text_content, "links": links} | |
except Exception as e: | |
return {"error": str(e)} | |
def analyze_image(image_url: str) -> Dict[str, Any]: | |
""" | |
Analyze an image and extract information from it. | |
Args: | |
image_url: URL of the image to analyze | |
Returns: | |
Dictionary containing information about the image | |
""" | |
try: | |
# Download the image | |
response = requests.get(image_url) | |
response.raise_for_status() | |
# Open the image | |
img = Image.open(io.BytesIO(response.content)) | |
# Extract basic image information | |
width, height = img.size | |
format_type = img.format | |
mode = img.mode | |
return { | |
"width": width, | |
"height": height, | |
"format": format_type, | |
"mode": mode, | |
"aspect_ratio": width / height, | |
} | |
except Exception as e: | |
return {"error": str(e)} | |
def read_pdf(pdf_url: str) -> str: | |
""" | |
Extract text content from a PDF document. | |
Args: | |
pdf_url: URL of the PDF to read | |
Returns: | |
Text content extracted from the PDF | |
""" | |
try: | |
# Download the PDF | |
response = requests.get(pdf_url) | |
response.raise_for_status() | |
# This is a placeholder - in a real implementation, you would use a PDF parsing library | |
# such as PyPDF2, pdfplumber, or pdf2text | |
return "PDF content extraction would happen here in a real implementation" | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def parse_csv(csv_url: str) -> Dict[str, Any]: | |
""" | |
Parse a CSV file and return its content as structured data. | |
Args: | |
csv_url: URL of the CSV file to parse | |
Returns: | |
Dictionary containing parsed CSV data | |
""" | |
try: | |
# Download the CSV | |
response = requests.get(csv_url) | |
response.raise_for_status() | |
# Parse the CSV | |
df = pd.read_csv(io.StringIO(response.text)) | |
# Convert to dictionary format | |
columns = df.columns.tolist() | |
data = df.to_dict(orient="records") | |
# Return basic statistics and preview | |
return { | |
"columns": columns, | |
"row_count": len(data), | |
"preview": data[:5] if len(data) > 5 else data, | |
"column_dtypes": {col: str(df[col].dtype) for col in columns}, | |
} | |
except Exception as e: | |
return {"error": str(e)} | |
def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]: | |
""" | |
Find occurrences of a query string in page content. | |
Args: | |
page_content: Page content returned by browse_webpage | |
query: String to search for in the page | |
Returns: | |
List of sentences or sections containing the query | |
""" | |
results = [] | |
if "content" in page_content: | |
content = page_content["content"] | |
# Split content into sentences | |
sentences = re.split(r"(?<=[.!?])\s+", content) | |
# Find sentences containing the query | |
for sentence in sentences: | |
if query.lower() in sentence.lower(): | |
results.append(sentence) | |
return results | |
def extract_dates(text: str) -> List[str]: | |
""" | |
Extract dates from text content. | |
Args: | |
text: Text content to extract dates from | |
Returns: | |
List of date strings found in the text | |
""" | |
# Simple regex patterns for date extraction | |
# These patterns can be expanded for better coverage | |
date_patterns = [ | |
r"\d{1,2}/\d{1,2}/\d{2,4}", # MM/DD/YYYY or DD/MM/YYYY | |
r"\d{1,2}-\d{1,2}-\d{2,4}", # MM-DD-YYYY or DD-MM-YYYY | |
r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b", # Month DD, YYYY | |
r"\b\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{4}\b", # DD Month YYYY | |
] | |
results = [] | |
for pattern in date_patterns: | |
matches = re.findall(pattern, text, re.IGNORECASE) | |
results.extend(matches) | |
return results | |
def perform_calculation(expression: str) -> Dict[str, Any]: | |
""" | |
Safely evaluate a mathematical expression. | |
Args: | |
expression: Mathematical expression to evaluate | |
Returns: | |
Dictionary containing the result or error message | |
""" | |
try: | |
# Using a safer approach than eval() | |
# This is very limited but safer | |
import math | |
# Define allowed names | |
allowed_names = { | |
"abs": abs, | |
"round": round, | |
"min": min, | |
"max": max, | |
"sum": sum, | |
"len": len, | |
"pow": pow, | |
"math": math, | |
} | |
# Clean the expression | |
cleaned_expr = expression.strip() | |
# Evaluate using safer methods (this is still a simplified example) | |
# In a real implementation, use a proper math expression parser | |
result = eval(cleaned_expr, {"__builtins__": {}}, allowed_names) | |
return {"result": result} | |
except Exception as e: | |
return {"error": str(e)} | |