Spaces:
Sleeping
Sleeping
File size: 3,474 Bytes
9333f04 062a4b0 4e13619 062a4b0 9333f04 4e13619 9333f04 062a4b0 4e13619 062a4b0 9333f04 062a4b0 4e13619 062a4b0 4e13619 062a4b0 9333f04 4e13619 9333f04 062a4b0 4e13619 062a4b0 9333f04 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 4e13619 062a4b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
from langchain_community.tools import DuckDuckGoSearchRun
from langchain.tools import BaseTool
from typing import Optional, Type
import requests
from bs4 import BeautifulSoup
import wikipedia
class WikipediaSearchTool(BaseTool):
name: str = "wikipedia_search"
description: str = "Search for information on Wikipedia using a given term or subject"
args_schema: Optional[Type] = None
def _run(self, query: str) -> str:
"""Synchronous Wikipedia search"""
try:
wikipedia.set_lang("en")
summary = wikipedia.summary(query, sentences=3)
return summary
except wikipedia.exceptions.DisambiguationError as e:
return f"Ambiguity: multiple possible results for '{query}': {e.options[:5]}"
except wikipedia.exceptions.PageError:
return f"No page found for '{query}'."
except Exception as e:
return f"Error during Wikipedia search: {str(e)}"
async def _arun(self, query: str) -> str:
"""Asynchronous Wikipedia search (fallback to sync)"""
return self._run(query)
class WebSearchTool(BaseTool):
name: str = "web_search"
description: str = "Search for information on the web using a search term"
args_schema: Optional[Type] = None
def _run(self, query: str) -> str:
"""Execute a web search and return relevant results"""
try:
search_tool = DuckDuckGoSearchRun()
return search_tool.run(query)
except Exception as e:
return f"Error during web search: {str(e)}"
async def _arun(self, query: str) -> str:
"""Asynchronous version of the tool"""
return self._run(query)
class WebContentTool(BaseTool):
name: str = "fetch_web_content"
description: str = "Retrieve the content of a web page from a URL"
args_schema: Optional[Type] = None
def _run(self, url: str) -> str:
"""Retrieve and clean web page content"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
if response.status_code != 200:
return f"Error retrieving content: {response.status_code}"
# Extract content with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts, styles and other irrelevant elements
for element in soup(['script', 'style', 'header', 'footer', 'nav']):
element.decompose()
# Extract main text
text = soup.get_text(separator='\n')
# Clean text (multiple spaces, empty lines)
lines = [line.strip() for line in text.split('\n') if line.strip()]
cleaned_text = '\n'.join(lines)
# Limit text length
max_length = 5000
if len(cleaned_text) > max_length:
cleaned_text = cleaned_text[:max_length] + "... (content truncated)"
return cleaned_text
except Exception as e:
return f"Error retrieving web content: {str(e)}"
async def _arun(self, url: str) -> str:
"""Asynchronous version of the tool"""
return self._run(url) |