from langchain_community.tools import DuckDuckGoSearchRun from langchain.tools import BaseTool from typing import Optional, Type import requests from bs4 import BeautifulSoup import wikipedia class WikipediaSearchTool(BaseTool): name: str = "wikipedia_search" description: str = "Search for information on Wikipedia using a given term or subject" args_schema: Optional[Type] = None def _run(self, query: str) -> str: """Synchronous Wikipedia search""" try: wikipedia.set_lang("en") summary = wikipedia.summary(query, sentences=3) return summary except wikipedia.exceptions.DisambiguationError as e: return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}" except wikipedia.exceptions.PageError: return f"No page found for '{query}'." except Exception as e: return f"Error during Wikipedia search: {str(e)}" async def _arun(self, query: str) -> str: """Asynchronous Wikipedia search (fallback to sync)""" return self._run(query) class WebSearchTool(BaseTool): name: str = "web_search" description: str = "Search for information on the web using a search term" args_schema: Optional[Type] = None def _run(self, query: str) -> str: """Execute a web search and return relevant results""" try: search_tool = DuckDuckGoSearchRun() return search_tool.run(query) except Exception as e: return f"Error during web search: {str(e)}" async def _arun(self, query: str) -> str: """Asynchronous version of the tool""" return self._run(query) class WebContentTool(BaseTool): name: str = "fetch_web_content" description: str = "Retrieve the content of a web page from a URL" args_schema: Optional[Type] = None def _run(self, url: str) -> str: """Retrieve and clean web page content""" try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(url, headers=headers, timeout=10) if response.status_code != 200: return f"Error retrieving content: {response.status_code}" # Extract content with BeautifulSoup soup = BeautifulSoup(response.text, 'html.parser') # Remove scripts, styles and other irrelevant elements for element in soup(['script', 'style', 'header', 'footer', 'nav']): element.decompose() # Extract main text text = soup.get_text(separator='\n') # Clean text (multiple spaces, empty lines) lines = [line.strip() for line in text.split('\n') if line.strip()] cleaned_text = '\n'.join(lines) # Limit text length max_length = 5000 if len(cleaned_text) > max_length: cleaned_text = cleaned_text[:max_length] + "... (content truncated)" return cleaned_text except Exception as e: return f"Error retrieving web content: {str(e)}" async def _arun(self, url: str) -> str: """Asynchronous version of the tool""" return self._run(url)