Final_Assignment_Template

Sleeping

File size: 3,474 Bytes

from langchain_community.tools import DuckDuckGoSearchRun
from langchain.tools import BaseTool
from typing import Optional, Type
import requests
from bs4 import BeautifulSoup
import wikipedia


class WikipediaSearchTool(BaseTool):
    name: str = "wikipedia_search"
    description: str = "Search for information on Wikipedia using a given term or subject"
    args_schema: Optional[Type] = None

    def _run(self, query: str) -> str:
        """Synchronous Wikipedia search"""
        try:
            wikipedia.set_lang("en")  
            summary = wikipedia.summary(query, sentences=3)
            return summary
        except wikipedia.exceptions.DisambiguationError as e:
            return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}"
        except wikipedia.exceptions.PageError:
            return f"No page found for '{query}'."
        except Exception as e:
            return f"Error during Wikipedia search: {str(e)}"

    async def _arun(self, query: str) -> str:
        """Asynchronous Wikipedia search (fallback to sync)"""
        return self._run(query)


class WebSearchTool(BaseTool):
    name: str = "web_search"
    description: str = "Search for information on the web using a search term"
    args_schema: Optional[Type] = None
    
    def _run(self, query: str) -> str:
        """Execute a web search and return relevant results"""
        try:
            search_tool = DuckDuckGoSearchRun()
            return search_tool.run(query)
        except Exception as e:
            return f"Error during web search: {str(e)}"
    
    async def _arun(self, query: str) -> str:
        """Asynchronous version of the tool"""
        return self._run(query)

class WebContentTool(BaseTool):
    name: str = "fetch_web_content"
    description: str = "Retrieve the content of a web page from a URL"
    args_schema: Optional[Type] = None
    
    def _run(self, url: str) -> str:
        """Retrieve and clean web page content"""
        try:
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            response = requests.get(url, headers=headers, timeout=10)
            
            if response.status_code != 200:
                return f"Error retrieving content: {response.status_code}"
            
            # Extract content with BeautifulSoup
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Remove scripts, styles and other irrelevant elements
            for element in soup(['script', 'style', 'header', 'footer', 'nav']):
                element.decompose()
            
            # Extract main text
            text = soup.get_text(separator='\n')
            
            # Clean text (multiple spaces, empty lines)
            lines = [line.strip() for line in text.split('\n') if line.strip()]
            cleaned_text = '\n'.join(lines)
            
            # Limit text length
            max_length = 5000
            if len(cleaned_text) > max_length:
                cleaned_text = cleaned_text[:max_length] + "... (content truncated)"
            
            return cleaned_text
            
        except Exception as e:
            return f"Error retrieving web content: {str(e)}"
    
    async def _arun(self, url: str) -> str:
        """Asynchronous version of the tool"""
        return self._run(url)