Final_Assignment_Template

Running

App Files Files Community

Final_Assignment_Template / tools /web_tools.py

benfls

ajout system prompt

4e13619 5 days ago

raw

history blame contribute delete

3.47 kB

	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain.tools import BaseTool
	from typing import Optional, Type
	import requests
	from bs4 import BeautifulSoup
	import wikipedia


	class WikipediaSearchTool(BaseTool):
	name: str = "wikipedia_search"
	description: str = "Search for information on Wikipedia using a given term or subject"
	args_schema: Optional[Type] = None

	def _run(self, query: str) -> str:
	"""Synchronous Wikipedia search"""
	try:
	wikipedia.set_lang("en")
	summary = wikipedia.summary(query, sentences=3)
	return summary
	except wikipedia.exceptions.DisambiguationError as e:
	return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}"
	except wikipedia.exceptions.PageError:
	return f"No page found for '{query}'."
	except Exception as e:
	return f"Error during Wikipedia search: {str(e)}"

	async def _arun(self, query: str) -> str:
	"""Asynchronous Wikipedia search (fallback to sync)"""
	return self._run(query)


	class WebSearchTool(BaseTool):
	name: str = "web_search"
	description: str = "Search for information on the web using a search term"
	args_schema: Optional[Type] = None

	def _run(self, query: str) -> str:
	"""Execute a web search and return relevant results"""
	try:
	search_tool = DuckDuckGoSearchRun()
	return search_tool.run(query)
	except Exception as e:
	return f"Error during web search: {str(e)}"

	async def _arun(self, query: str) -> str:
	"""Asynchronous version of the tool"""
	return self._run(query)

	class WebContentTool(BaseTool):
	name: str = "fetch_web_content"
	description: str = "Retrieve the content of a web page from a URL"
	args_schema: Optional[Type] = None

	def _run(self, url: str) -> str:
	"""Retrieve and clean web page content"""
	try:
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}
	response = requests.get(url, headers=headers, timeout=10)

	if response.status_code != 200:
	return f"Error retrieving content: {response.status_code}"

	# Extract content with BeautifulSoup
	soup = BeautifulSoup(response.text, 'html.parser')

	# Remove scripts, styles and other irrelevant elements
	for element in soup(['script', 'style', 'header', 'footer', 'nav']):
	element.decompose()

	# Extract main text
	text = soup.get_text(separator='\n')

	# Clean text (multiple spaces, empty lines)
	lines = [line.strip() for line in text.split('\n') if line.strip()]
	cleaned_text = '\n'.join(lines)

	# Limit text length
	max_length = 5000
	if len(cleaned_text) > max_length:
	cleaned_text = cleaned_text[:max_length] + "... (content truncated)"

	return cleaned_text

	except Exception as e:
	return f"Error retrieving web content: {str(e)}"

	async def _arun(self, url: str) -> str:
	"""Asynchronous version of the tool"""
	return self._run(url)