import io import re from typing import Any, Dict, List import pandas as pd import requests from bs4 import BeautifulSoup from PIL import Image from smolagents import tool from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool @tool def web_search(query: str) -> str: """ Search the web for information. Args: query: Search query to find information Returns: Search results as text """ # Using the built-in DuckDuckGo search tool from smolagents # search_tool = DuckDuckGoSearchTool() search_tool = DuckDuckGoSearchTool(max_results=3) results = search_tool.execute(query) return results @tool def browse_webpage(url: str) -> Dict[str, Any]: """ Browse a webpage and extract its content. Args: url: URL of the webpage to browse Returns: Dictionary containing title, text content, and links from the webpage """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(url, headers=headers) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") # Extract title title = soup.title.string if soup.title else "No title found" # Extract main text content paragraphs = soup.find_all("p") text_content = "\n".join([p.get_text().strip() for p in paragraphs]) # Extract links links = [] for link in soup.find_all("a", href=True): href = link["href"] text = link.get_text().strip() if href.startswith("http"): links.append({"text": text, "href": href}) return {"title": title, "content": text_content, "links": links} except Exception as e: return {"error": str(e)} @tool def analyze_image(image_url: str) -> Dict[str, Any]: """ Analyze an image and extract information from it. Args: image_url: URL of the image to analyze Returns: Dictionary containing information about the image """ try: # Download the image response = requests.get(image_url) response.raise_for_status() # Open the image img = Image.open(io.BytesIO(response.content)) # Extract basic image information width, height = img.size format_type = img.format mode = img.mode return { "width": width, "height": height, "format": format_type, "mode": mode, "aspect_ratio": width / height, } except Exception as e: return {"error": str(e)} @tool def read_pdf(pdf_url: str) -> str: """ Extract text content from a PDF document. Args: pdf_url: URL of the PDF to read Returns: Text content extracted from the PDF """ try: # Download the PDF response = requests.get(pdf_url) response.raise_for_status() # This is a placeholder - in a real implementation, you would use a PDF parsing library # such as PyPDF2, pdfplumber, or pdf2text return "PDF content extraction would happen here in a real implementation" except Exception as e: return f"Error: {str(e)}" @tool def parse_csv(csv_url: str) -> Dict[str, Any]: """ Parse a CSV file and return its content as structured data. Args: csv_url: URL of the CSV file to parse Returns: Dictionary containing parsed CSV data """ try: # Download the CSV response = requests.get(csv_url) response.raise_for_status() # Parse the CSV df = pd.read_csv(io.StringIO(response.text)) # Convert to dictionary format columns = df.columns.tolist() data = df.to_dict(orient="records") # Return basic statistics and preview return { "columns": columns, "row_count": len(data), "preview": data[:5] if len(data) > 5 else data, "column_dtypes": {col: str(df[col].dtype) for col in columns}, } except Exception as e: return {"error": str(e)} @tool def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]: """ Find occurrences of a query string in page content. Args: page_content: Page content returned by browse_webpage query: String to search for in the page Returns: List of sentences or sections containing the query """ results = [] if "content" in page_content: content = page_content["content"] # Split content into sentences sentences = re.split(r"(?<=[.!?])\s+", content) # Find sentences containing the query for sentence in sentences: if query.lower() in sentence.lower(): results.append(sentence) return results @tool def extract_dates(text: str) -> List[str]: """ Extract dates from text content. Args: text: Text content to extract dates from Returns: List of date strings found in the text """ # Simple regex patterns for date extraction # These patterns can be expanded for better coverage date_patterns = [ r"\d{1,2}/\d{1,2}/\d{2,4}", # MM/DD/YYYY or DD/MM/YYYY r"\d{1,2}-\d{1,2}-\d{2,4}", # MM-DD-YYYY or DD-MM-YYYY r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b", # Month DD, YYYY r"\b\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{4}\b", # DD Month YYYY ] results = [] for pattern in date_patterns: matches = re.findall(pattern, text, re.IGNORECASE) results.extend(matches) return results @tool def perform_calculation(expression: str) -> Dict[str, Any]: """ Safely evaluate a mathematical expression. Args: expression: Mathematical expression to evaluate Returns: Dictionary containing the result or error message """ try: # Using a safer approach than eval() # This is very limited but safer import math # Define allowed names allowed_names = { "abs": abs, "round": round, "min": min, "max": max, "sum": sum, "len": len, "pow": pow, "math": math, } # Clean the expression cleaned_expr = expression.strip() # Evaluate using safer methods (this is still a simplified example) # In a real implementation, use a proper math expression parser result = eval(cleaned_expr, {"__builtins__": {}}, allowed_names) return {"result": result} except Exception as e: return {"error": str(e)}