benfls's picture
ajout system prompt
4e13619
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.tools import BaseTool
from typing import Optional, Type
import requests
from bs4 import BeautifulSoup
import wikipedia
class WikipediaSearchTool(BaseTool):
name: str = "wikipedia_search"
description: str = "Search for information on Wikipedia using a given term or subject"
args_schema: Optional[Type] = None
def _run(self, query: str) -> str:
"""Synchronous Wikipedia search"""
try:
wikipedia.set_lang("en")
summary = wikipedia.summary(query, sentences=3)
return summary
except wikipedia.exceptions.DisambiguationError as e:
return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}"
except wikipedia.exceptions.PageError:
return f"No page found for '{query}'."
except Exception as e:
return f"Error during Wikipedia search: {str(e)}"
async def _arun(self, query: str) -> str:
"""Asynchronous Wikipedia search (fallback to sync)"""
return self._run(query)
class WebSearchTool(BaseTool):
name: str = "web_search"
description: str = "Search for information on the web using a search term"
args_schema: Optional[Type] = None
def _run(self, query: str) -> str:
"""Execute a web search and return relevant results"""
try:
search_tool = DuckDuckGoSearchRun()
return search_tool.run(query)
except Exception as e:
return f"Error during web search: {str(e)}"
async def _arun(self, query: str) -> str:
"""Asynchronous version of the tool"""
return self._run(query)
class WebContentTool(BaseTool):
name: str = "fetch_web_content"
description: str = "Retrieve the content of a web page from a URL"
args_schema: Optional[Type] = None
def _run(self, url: str) -> str:
"""Retrieve and clean web page content"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code != 200:
return f"Error retrieving content: {response.status_code}"
# Extract content with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts, styles and other irrelevant elements
for element in soup(['script', 'style', 'header', 'footer', 'nav']):
element.decompose()
# Extract main text
text = soup.get_text(separator='\n')
# Clean text (multiple spaces, empty lines)
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = '\n'.join(lines)
# Limit text length
max_length = 5000
if len(cleaned_text) > max_length:
cleaned_text = cleaned_text[:max_length] + "... (content truncated)"
return cleaned_text
except Exception as e:
return f"Error retrieving web content: {str(e)}"
async def _arun(self, url: str) -> str:
"""Asynchronous version of the tool"""
return self._run(url)