File size: 6,911 Bytes
55ef143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
import io
import re
from typing import Any, Dict, List

import pandas as pd
import requests
from bs4 import BeautifulSoup
from PIL import Image
from smolagents import tool
from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool


@tool
def web_search(query: str) -> str:
    """
    Search the web for information.

    Args:
        query: Search query to find information

    Returns:
        Search results as text
    """
    # Using the built-in DuckDuckGo search tool from smolagents
    # search_tool = DuckDuckGoSearchTool()
    search_tool = DuckDuckGoSearchTool(max_results=3)
    results = search_tool.execute(query)
    return results


@tool
def browse_webpage(url: str) -> Dict[str, Any]:
    """
    Browse a webpage and extract its content.

    Args:
        url: URL of the webpage to browse

    Returns:
        Dictionary containing title, text content, and links from the webpage
    """
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")

        # Extract title
        title = soup.title.string if soup.title else "No title found"

        # Extract main text content
        paragraphs = soup.find_all("p")
        text_content = "\n".join([p.get_text().strip() for p in paragraphs])

        # Extract links
        links = []
        for link in soup.find_all("a", href=True):
            href = link["href"]
            text = link.get_text().strip()
            if href.startswith("http"):
                links.append({"text": text, "href": href})

        return {"title": title, "content": text_content, "links": links}
    except Exception as e:
        return {"error": str(e)}


@tool
def analyze_image(image_url: str) -> Dict[str, Any]:
    """
    Analyze an image and extract information from it.

    Args:
        image_url: URL of the image to analyze

    Returns:
        Dictionary containing information about the image
    """
    try:
        # Download the image
        response = requests.get(image_url)
        response.raise_for_status()

        # Open the image
        img = Image.open(io.BytesIO(response.content))

        # Extract basic image information
        width, height = img.size
        format_type = img.format
        mode = img.mode

        return {
            "width": width,
            "height": height,
            "format": format_type,
            "mode": mode,
            "aspect_ratio": width / height,
        }
    except Exception as e:
        return {"error": str(e)}


@tool
def read_pdf(pdf_url: str) -> str:
    """
    Extract text content from a PDF document.

    Args:
        pdf_url: URL of the PDF to read

    Returns:
        Text content extracted from the PDF
    """
    try:
        # Download the PDF
        response = requests.get(pdf_url)
        response.raise_for_status()

        # This is a placeholder - in a real implementation, you would use a PDF parsing library
        # such as PyPDF2, pdfplumber, or pdf2text
        return "PDF content extraction would happen here in a real implementation"
    except Exception as e:
        return f"Error: {str(e)}"


@tool
def parse_csv(csv_url: str) -> Dict[str, Any]:
    """
    Parse a CSV file and return its content as structured data.

    Args:
        csv_url: URL of the CSV file to parse

    Returns:
        Dictionary containing parsed CSV data
    """
    try:
        # Download the CSV
        response = requests.get(csv_url)
        response.raise_for_status()

        # Parse the CSV
        df = pd.read_csv(io.StringIO(response.text))

        # Convert to dictionary format
        columns = df.columns.tolist()
        data = df.to_dict(orient="records")

        # Return basic statistics and preview
        return {
            "columns": columns,
            "row_count": len(data),
            "preview": data[:5] if len(data) > 5 else data,
            "column_dtypes": {col: str(df[col].dtype) for col in columns},
        }
    except Exception as e:
        return {"error": str(e)}


@tool
def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]:
    """
    Find occurrences of a query string in page content.

    Args:
        page_content: Page content returned by browse_webpage
        query: String to search for in the page

    Returns:
        List of sentences or sections containing the query
    """
    results = []
    if "content" in page_content:
        content = page_content["content"]
        # Split content into sentences
        sentences = re.split(r"(?<=[.!?])\s+", content)

        # Find sentences containing the query
        for sentence in sentences:
            if query.lower() in sentence.lower():
                results.append(sentence)

    return results


@tool
def extract_dates(text: str) -> List[str]:
    """
    Extract dates from text content.

    Args:
        text: Text content to extract dates from

    Returns:
        List of date strings found in the text
    """
    # Simple regex patterns for date extraction
    # These patterns can be expanded for better coverage
    date_patterns = [
        r"\d{1,2}/\d{1,2}/\d{2,4}",  # MM/DD/YYYY or DD/MM/YYYY
        r"\d{1,2}-\d{1,2}-\d{2,4}",  # MM-DD-YYYY or DD-MM-YYYY
        r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{1,2},? \d{4}\b",  # Month DD, YYYY
        r"\b\d{1,2} (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]* \d{4}\b",  # DD Month YYYY
    ]

    results = []
    for pattern in date_patterns:
        matches = re.findall(pattern, text, re.IGNORECASE)
        results.extend(matches)

    return results


@tool
def perform_calculation(expression: str) -> Dict[str, Any]:
    """
    Safely evaluate a mathematical expression.

    Args:
        expression: Mathematical expression to evaluate

    Returns:
        Dictionary containing the result or error message
    """
    try:
        # Using a safer approach than eval()
        # This is very limited but safer
        import math

        # Define allowed names
        allowed_names = {
            "abs": abs,
            "round": round,
            "min": min,
            "max": max,
            "sum": sum,
            "len": len,
            "pow": pow,
            "math": math,
        }

        # Clean the expression
        cleaned_expr = expression.strip()

        # Evaluate using safer methods (this is still a simplified example)
        # In a real implementation, use a proper math expression parser
        result = eval(cleaned_expr, {"__builtins__": {}}, allowed_names)

        return {"result": result}
    except Exception as e:
        return {"error": str(e)}