Final_Assignment_Template

Build error

Add new agent functionalities by creating agents for web browsing, data analysis, and media handling in agents.py. Introduce various tools in tools.py for web searching, webpage browsing, image analysis, PDF reading, CSV parsing, and date extraction, enhancing the overall capabilities of the agent system.

55ef143 unverified 19 days ago

raw

history blame

6.91 kB

	import io
	import re
	from typing import Any, Dict, List

	import pandas as pd
	import requests
	from bs4 import BeautifulSoup
	from PIL import Image
	from smolagents import tool
	from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool


	@tool
	def web_search(query: str) -> str:
	"""
	Search the web for information.

	Args:
	query: Search query to find information

	Returns:
	Search results as text
	"""
	# Using the built-in DuckDuckGo search tool from smolagents
	# search_tool = DuckDuckGoSearchTool()
	search_tool = DuckDuckGoSearchTool(max_results=3)
	results = search_tool.execute(query)
	return results


	@tool
	def browse_webpage(url: str) -> Dict[str, Any]:
	"""
	Browse a webpage and extract its content.

	Args:
	url: URL of the webpage to browse

	Returns:
	Dictionary containing title, text content, and links from the webpage
	"""
	try:
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}
	response = requests.get(url, headers=headers)
	response.raise_for_status()

	soup = BeautifulSoup(response.text, "html.parser")

	# Extract title
	title = soup.title.string if soup.title else "No title found"

	# Extract main text content
	paragraphs = soup.find_all("p")
	text_content = "\n".join([p.get_text().strip() for p in paragraphs])

	# Extract links
	links = []
	for link in soup.find_all("a", href=True):
	href = link["href"]
	text = link.get_text().strip()
	if href.startswith("http"):
	links.append({"text": text, "href": href})

	return {"title": title, "content": text_content, "links": links}
	except Exception as e:
	return {"error": str(e)}


	@tool
	def analyze_image(image_url: str) -> Dict[str, Any]:
	"""
	Analyze an image and extract information from it.

	Args:
	image_url: URL of the image to analyze

	Returns:
	Dictionary containing information about the image
	"""
	try:
	# Download the image
	response = requests.get(image_url)
	response.raise_for_status()

	# Open the image
	img = Image.open(io.BytesIO(response.content))

	# Extract basic image information
	width, height = img.size
	format_type = img.format
	mode = img.mode

	return {
	"width": width,
	"height": height,
	"format": format_type,
	"mode": mode,
	"aspect_ratio": width / height,
	}
	except Exception as e:
	return {"error": str(e)}


	@tool
	def read_pdf(pdf_url: str) -> str:
	"""
	Extract text content from a PDF document.

	Args:
	pdf_url: URL of the PDF to read

	Returns:
	Text content extracted from the PDF
	"""
	try:
	# Download the PDF
	response = requests.get(pdf_url)
	response.raise_for_status()

	# This is a placeholder - in a real implementation, you would use a PDF parsing library
	# such as PyPDF2, pdfplumber, or pdf2text
	return "PDF content extraction would happen here in a real implementation"
	except Exception as e:
	return f"Error: {str(e)}"


	@tool
	def parse_csv(csv_url: str) -> Dict[str, Any]:
	"""
	Parse a CSV file and return its content as structured data.

	Args:
	csv_url: URL of the CSV file to parse

	Returns:
	Dictionary containing parsed CSV data
	"""
	try:
	# Download the CSV
	response = requests.get(csv_url)
	response.raise_for_status()

	# Parse the CSV
	df = pd.read_csv(io.StringIO(response.text))

	# Convert to dictionary format
	columns = df.columns.tolist()
	data = df.to_dict(orient="records")

	# Return basic statistics and preview
	return {
	"columns": columns,
	"row_count": len(data),
	"preview": data[:5] if len(data) > 5 else data,
	"column_dtypes": {col: str(df[col].dtype) for col in columns},
	}
	except Exception as e:
	return {"error": str(e)}


	@tool
	def find_in_page(page_content: Dict[str, Any], query: str) -> List[str]:
	"""
	Find occurrences of a query string in page content.

	Args:
	page_content: Page content returned by browse_webpage
	query: String to search for in the page

	Returns:
	List of sentences or sections containing the query
	"""
	results = []
	if "content" in page_content:
	content = page_content["content"]
	# Split content into sentences
	sentences = re.split(r"(?<=[.!?])\s+", content)

	# Find sentences containing the query
	for sentence in sentences:
	if query.lower() in sentence.lower():
	results.append(sentence)

	return results


	@tool
	def extract_dates(text: str) -> List[str]:
	"""
	Extract dates from text content.

	Args:
	text: Text content to extract dates from

	Returns:
	List of date strings found in the text
	"""
	# Simple regex patterns for date extraction
	# These patterns can be expanded for better coverage
	date_patterns = [
	r"\d{1,2}/\d{1,2}/\d{2,4}", # MM/DD/YYYY or DD/MM/YYYY
	r"\d{1,2}-\d{1,2}-\d{2,4}", # MM-DD-YYYY or DD-MM-YYYY
	r"\b(?:Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)[a-z]* \d{1,2},? \d{4}\b", # Month DD, YYYY
	r"\b\d{1,2} (?:Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)[a-z]* \d{4}\b", # DD Month YYYY
	]

	results = []
	for pattern in date_patterns:
	matches = re.findall(pattern, text, re.IGNORECASE)
	results.extend(matches)

	return results


	@tool
	def perform_calculation(expression: str) -> Dict[str, Any]:
	"""
	Safely evaluate a mathematical expression.

	Args:
	expression: Mathematical expression to evaluate

	Returns:
	Dictionary containing the result or error message
	"""
	try:
	# Using a safer approach than eval()
	# This is very limited but safer
	import math

	# Define allowed names
	allowed_names = {
	"abs": abs,
	"round": round,
	"min": min,
	"max": max,
	"sum": sum,
	"len": len,
	"pow": pow,
	"math": math,
	}

	# Clean the expression
	cleaned_expr = expression.strip()

	# Evaluate using safer methods (this is still a simplified example)
	# In a real implementation, use a proper math expression parser
	result = eval(cleaned_expr, {"__builtins__": {}}, allowed_names)

	return {"result": result}
	except Exception as e:
	return {"error": str(e)}