File size: 3,474 Bytes
9333f04
 
 
062a4b0
 
4e13619
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
062a4b0
 
9333f04
4e13619
9333f04
062a4b0
 
4e13619
062a4b0
9333f04
 
062a4b0
4e13619
062a4b0
 
4e13619
062a4b0
 
 
9333f04
4e13619
9333f04
062a4b0
 
4e13619
062a4b0
 
 
 
9333f04
062a4b0
 
4e13619
062a4b0
4e13619
062a4b0
 
4e13619
062a4b0
 
 
4e13619
062a4b0
 
4e13619
062a4b0
 
 
4e13619
062a4b0
 
4e13619
062a4b0
 
 
 
4e13619
062a4b0
 
4e13619
062a4b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.tools import BaseTool
from typing import Optional, Type
import requests
from bs4 import BeautifulSoup
import wikipedia


class WikipediaSearchTool(BaseTool):
    name: str = "wikipedia_search"
    description: str = "Search for information on Wikipedia using a given term or subject"
    args_schema: Optional[Type] = None

    def _run(self, query: str) -> str:
        """Synchronous Wikipedia search"""
        try:
            wikipedia.set_lang("en")  
            summary = wikipedia.summary(query, sentences=3)
            return summary
        except wikipedia.exceptions.DisambiguationError as e:
            return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}"
        except wikipedia.exceptions.PageError:
            return f"No page found for '{query}'."
        except Exception as e:
            return f"Error during Wikipedia search: {str(e)}"

    async def _arun(self, query: str) -> str:
        """Asynchronous Wikipedia search (fallback to sync)"""
        return self._run(query)


class WebSearchTool(BaseTool):
    name: str = "web_search"
    description: str = "Search for information on the web using a search term"
    args_schema: Optional[Type] = None
    
    def _run(self, query: str) -> str:
        """Execute a web search and return relevant results"""
        try:
            search_tool = DuckDuckGoSearchRun()
            return search_tool.run(query)
        except Exception as e:
            return f"Error during web search: {str(e)}"
    
    async def _arun(self, query: str) -> str:
        """Asynchronous version of the tool"""
        return self._run(query)

class WebContentTool(BaseTool):
    name: str = "fetch_web_content"
    description: str = "Retrieve the content of a web page from a URL"
    args_schema: Optional[Type] = None
    
    def _run(self, url: str) -> str:
        """Retrieve and clean web page content"""
        try:
            headers = {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
            response = requests.get(url, headers=headers, timeout=10)
            
            if response.status_code != 200:
                return f"Error retrieving content: {response.status_code}"
            
            # Extract content with BeautifulSoup
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Remove scripts, styles and other irrelevant elements
            for element in soup(['script', 'style', 'header', 'footer', 'nav']):
                element.decompose()
            
            # Extract main text
            text = soup.get_text(separator='\n')
            
            # Clean text (multiple spaces, empty lines)
            lines = [line.strip() for line in text.split('\n') if line.strip()]
            cleaned_text = '\n'.join(lines)
            
            # Limit text length
            max_length = 5000
            if len(cleaned_text) > max_length:
                cleaned_text = cleaned_text[:max_length] + "... (content truncated)"
            
            return cleaned_text
            
        except Exception as e:
            return f"Error retrieving web content: {str(e)}"
    
    async def _arun(self, url: str) -> str:
        """Asynchronous version of the tool"""
        return self._run(url)