Final_Assignment_Template

Build error

App Files Files Community

mjschock commited on 19 days ago

Commit

55ef143

unverified ·

1 Parent(s): 3a4521b

Add new agent functionalities by creating agents for web browsing, data analysis, and media handling in agents.py. Introduce various tools in tools.py for web searching, webpage browsing, image analysis, PDF reading, CSV parsing, and date extraction, enhancing the overall capabilities of the agent system.

Browse files

Files changed (2) hide show

agents.py +78 -0
tools.py +254 -0

agents.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from smolagents import CodeAgent
+from tools import (
+    analyze_image,
+    browse_webpage,
+    extract_dates,
+    find_in_page,
+    parse_csv,
+    perform_calculation,
+    read_pdf,
+    web_search,
+)
+def create_web_agent(model):
+    """
+    Create a specialized agent for web browsing tasks.
+    Args:
+        model: The model to use for the agent
+    Returns:
+        Configured CodeAgent for web browsing
+    """
+    web_agent = CodeAgent(
+        tools=[web_search, browse_webpage, find_in_page, extract_dates],
+        model=model,
+        name="web_agent",
+        description="Specialized agent for web browsing and searching. Use this agent to find information online, browse websites, and extract information from web pages.",
+        add_base_tools=True,
+        additional_authorized_imports=["requests", "bs4", "re", "json"],
+    )
+    return web_agent
+def create_data_analysis_agent(model):
+    """
+    Create a specialized agent for data analysis tasks.
+    Args:
+        model: The model to use for the agent
+    Returns:
+        Configured CodeAgent for data analysis
+    """
+    data_agent = CodeAgent(
+        tools=[parse_csv, perform_calculation],
+        model=model,
+        name="data_agent",
+        description="Specialized agent for data analysis. Use this agent to analyze data, perform calculations, and extract insights from structured data.",
+        add_base_tools=True,
+        additional_authorized_imports=["pandas", "numpy", "math", "csv", "io"],
+    )
+    return data_agent
+def create_media_agent(model):
+    """
+    Create a specialized agent for handling media (images, PDFs).
+    Args:
+        model: The model to use for the agent
+    Returns:
+        Configured CodeAgent for media handling
+    """
+    media_agent = CodeAgent(
+        tools=[analyze_image, read_pdf],
+        model=model,
+        name="media_agent",
+        description="Specialized agent for handling media files like images and PDFs. Use this agent to analyze images and extract text from PDF documents.",
+        add_base_tools=True,
+        additional_authorized_imports=["PIL", "io", "requests"],
+    )
+    return media_agent

tools.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import io
+import re
+from typing import Any, Dict, List
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+from PIL import Image
+from smolagents import tool
+from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool
+@tool
+def web_search(query: str) -> str:
+    """
+    Search the web for information.
+    Args:
+        query: Search query to find information
+    Returns:
+        Search results as text
+    """
+    # Using the built-in DuckDuckGo search tool from smolagents
+    # search_tool = DuckDuckGoSearchTool()
+    search_tool = DuckDuckGoSearchTool(max_results=3)
+    results = search_tool.execute(query)
+    return results
+@tool
+def browse_webpage(url: str) -> Dict[str, Any]:
+    """
+    Browse a webpage and extract its content.
+    Args:
+        url: URL of the webpage to browse
+    Returns:
+        Dictionary containing title, text content, and links from the webpage
+    """
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+        # Extract title
+        title = soup.title.string if soup.title else "No title found"
+        # Extract main text content
+        paragraphs = soup.find_all("p")
+        text_content = "\n".join([p.get_text().strip() for p in paragraphs])
+        # Extract links
+        links = []
+        for link in soup.find_all("a", href=True):
+            href = link["href"]
+            text = link.get_text().strip()
+            if href.startswith("http"):
+                links.append({"text": text, "href": href})
+        return {"title": title, "content": text_content, "links": links}
+    except Exception as e:
+        return {"error": str(e)}
+@tool
+def analyze_image(image_url: str) -> Dict[str, Any]:
+    """
+    Analyze an image and extract information from it.
+    Args:
+        image_url: URL of the image to analyze
+    Returns:
+        Dictionary containing information about the image
+    """
+    try:
+        # Download the image
+        response = requests.get(image_url)
+        response.raise_for_status()
+        # Open the image
+        img = Image.open(io.BytesIO(response.content))
+        # Extract basic image information
+        width, height = img.size
+        format_type = img.format
+        mode = img.mode
+        return {
+            "width": width,
+            "height": height,
+            "format": format_type,
+            "mode": mode,
+            "aspect_ratio": width / height,
+        }
+    except Exception as e:
+        return {"error": str(e)}
+@tool
+def read_pdf(pdf_url: str) -> str:
+    """
+    Extract text content from a PDF document.
+    Args:
+        pdf_url: URL of the PDF to read
+    Returns:
+        Text content extracted from the PDF
+    """
+    try:
+        # Download the PDF
+        response = requests.get(pdf_url)
+        response.raise_for_status()
+        # This is a placeholder - in a real implementation, you would use a PDF parsing library
+        # such as PyPDF2, pdfplumber, or pdf2text
+        return "PDF content extraction would happen here in a real implementation"
+    except Exception as e:
+        return f"Error: {str(e)}"
+@tool
+def parse_csv(csv_url: str) -> Dict[str, Any]:
+    """
+    Parse a CSV file and return its content as structured data.
+    Args:
+        csv_url: URL of the CSV file to parse
+    Returns:
+        Dictionary containing parsed CSV data
+    """
+    try:
+        # Download the CSV
+        response = requests.get(csv_url)
+        response.raise_for_status()
+        # Parse the CSV
+        df = pd.read_csv(io.StringIO(response.text))
+        # Convert to dictionary format
+        columns = df.columns.tolist()
+        data = df.to_dict(orient="records")
+        # Return basic statistics and preview
+        return {
+            "columns": columns,
+            "row_count": len(data),
+            "preview": data[:5] if len(data) > 5 else data,
+            "column_dtypes": {col: str(df[col].dtype) for col in columns},
+        }
+    except Exception as e:
+        return {"error": str(e)}
+@tool
+def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]:
+    """
+    Find occurrences of a query string in page content.
+    Args:
+        page_content: Page content returned by browse_webpage
+        query: String to search for in the page
+    Returns:
+        List of sentences or sections containing the query
+    """
+    results = []
+    if "content" in page_content:
+        content = page_content["content"]
+        # Split content into sentences
+        sentences = re.split(r"(?<=[.!?])\s+", content)
+        # Find sentences containing the query
+        for sentence in sentences:
+            if query.lower() in sentence.lower():
+                results.append(sentence)
+    return results
+@tool
+def extract_dates(text: str) -> List[str]:
+    """
+    Extract dates from text content.
+    Args:
+        text: Text content to extract dates from
+    Returns:
+        List of date strings found in the text
+    """
+    # Simple regex patterns for date extraction
+    # These patterns can be expanded for better coverage
+    date_patterns = [
+        r"\d{1,2}/\d{1,2}/\d{2,4}",  # MM/DD/YYYY or DD/MM/YYYY
+        r"\d{1,2}-\d{1,2}-\d{2,4}",  # MM-DD-YYYY or DD-MM-YYYY
+        r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b",  # Month DD, YYYY
+        r"\b\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{4}\b",  # DD Month YYYY
+    ]
+    results = []
+    for pattern in date_patterns:
+        matches = re.findall(pattern, text, re.IGNORECASE)
+        results.extend(matches)
+    return results
+@tool
+def perform_calculation(expression: str) -> Dict[str, Any]:
+    """
+    Safely evaluate a mathematical expression.
+    Args:
+        expression: Mathematical expression to evaluate
+    Returns:
+        Dictionary containing the result or error message
+    """
+    try:
+        # Using a safer approach than eval()
+        # This is very limited but safer
+        import math
+        # Define allowed names
+        allowed_names = {
+            "abs": abs,
+            "round": round,
+            "min": min,
+            "max": max,
+            "sum": sum,
+            "len": len,
+            "pow": pow,
+            "math": math,
+        }
+        # Clean the expression
+        cleaned_expr = expression.strip()
+        # Evaluate using safer methods (this is still a simplified example)
+        # In a real implementation, use a proper math expression parser
+        result = eval(cleaned_expr, {"__builtins__": {}}, allowed_names)
+        return {"result": result}
+    except Exception as e:
+        return {"error": str(e)}