Update tools/arxiv_tool.py
Browse files- tools/arxiv_tool.py +62 -38
tools/arxiv_tool.py
CHANGED
@@ -13,10 +13,10 @@ class ArxivSearchTool(Tool):
|
|
13 |
'query': {'type': 'string', 'description': 'The search query for papers'},
|
14 |
'max_results': {'type': 'integer', 'description': 'Maximum number of results to return'}
|
15 |
}
|
16 |
-
output_type =
|
17 |
|
18 |
def forward(self, query: str = "artificial intelligence",
|
19 |
-
max_results: int = 50) ->
|
20 |
try:
|
21 |
# Construct the API URL
|
22 |
base_url = 'http://export.arxiv.org/api/query?'
|
@@ -40,48 +40,72 @@ class ArxivSearchTool(Tool):
|
|
40 |
ns = {'atom': 'http://www.w3.org/2005/Atom',
|
41 |
'arxiv': 'http://arxiv.org/schemas/atom'}
|
42 |
|
43 |
-
results
|
|
|
|
|
44 |
for entry in root.findall('atom:entry', ns):
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
}
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
return results
|
60 |
except Exception as e:
|
61 |
-
return
|
62 |
|
63 |
class LatestPapersTool(Tool):
|
64 |
name = "get_latest_papers"
|
65 |
description = "Get papers from the last N days from saved results"
|
66 |
inputs = {'days_back': {'type': 'integer', 'description': 'Number of days to look back'}}
|
67 |
-
output_type =
|
68 |
|
69 |
-
def forward(self, days_back: int = 1) ->
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
'query': {'type': 'string', 'description': 'The search query for papers'},
|
14 |
'max_results': {'type': 'integer', 'description': 'Maximum number of results to return'}
|
15 |
}
|
16 |
+
output_type = "string"
|
17 |
|
18 |
def forward(self, query: str = "artificial intelligence",
|
19 |
+
max_results: int = 50) -> str:
|
20 |
try:
|
21 |
# Construct the API URL
|
22 |
base_url = 'http://export.arxiv.org/api/query?'
|
|
|
40 |
ns = {'atom': 'http://www.w3.org/2005/Atom',
|
41 |
'arxiv': 'http://arxiv.org/schemas/atom'}
|
42 |
|
43 |
+
# Format results as a string
|
44 |
+
formatted_results = "## ArXiv Search Results\n\n"
|
45 |
+
|
46 |
for entry in root.findall('atom:entry', ns):
|
47 |
+
title = entry.find('atom:title', ns).text.strip()
|
48 |
+
authors = [author.find('atom:name', ns).text
|
49 |
+
for author in entry.findall('atom:author', ns)]
|
50 |
+
summary = entry.find('atom:summary', ns).text.strip() if entry.find('atom:summary', ns) is not None else ''
|
51 |
+
published = entry.find('atom:published', ns).text.strip()
|
52 |
+
paper_id = entry.find('atom:id', ns).text.strip()
|
53 |
+
pdf_url = next((link.get('href') for link in entry.findall('atom:link', ns)
|
54 |
+
if link.get('type') == 'application/pdf'), None)
|
55 |
+
categories = [cat.get('term') for cat in entry.findall('atom:category', ns)]
|
56 |
+
|
57 |
+
formatted_results += f"### {title}\n"
|
58 |
+
formatted_results += f"- Authors: {', '.join(authors)}\n"
|
59 |
+
formatted_results += f"- Published: {published}\n"
|
60 |
+
formatted_results += f"- Categories: {', '.join(categories)}\n"
|
61 |
+
formatted_results += f"- PDF: {pdf_url}\n"
|
62 |
+
formatted_results += f"- Summary: {summary}\n\n"
|
63 |
+
|
64 |
+
return formatted_results
|
65 |
|
|
|
66 |
except Exception as e:
|
67 |
+
return f"Error searching ArXiv: {str(e)}"
|
68 |
|
69 |
class LatestPapersTool(Tool):
|
70 |
name = "get_latest_papers"
|
71 |
description = "Get papers from the last N days from saved results"
|
72 |
inputs = {'days_back': {'type': 'integer', 'description': 'Number of days to look back'}}
|
73 |
+
output_type = "string"
|
74 |
|
75 |
+
def forward(self, days_back: int = 1) -> str:
|
76 |
+
try:
|
77 |
+
papers = []
|
78 |
+
base_dir = "daily_papers"
|
79 |
+
|
80 |
+
# Get dates to check
|
81 |
+
dates = [
|
82 |
+
(datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
|
83 |
+
for i in range(days_back)
|
84 |
+
]
|
85 |
+
|
86 |
+
# Load papers for each date
|
87 |
+
for date in dates:
|
88 |
+
file_path = os.path.join(base_dir, f"ai_papers_{date}.json")
|
89 |
+
if os.path.exists(file_path):
|
90 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
91 |
+
day_papers = json.load(f)
|
92 |
+
papers.extend(day_papers)
|
93 |
+
|
94 |
+
# Format results as a string
|
95 |
+
formatted_results = f"## Latest Papers (Past {days_back} days)\n\n"
|
96 |
+
|
97 |
+
for paper in papers:
|
98 |
+
formatted_results += f"### {paper.get('title', 'Untitled')}\n"
|
99 |
+
formatted_results += f"- Authors: {', '.join(paper.get('authors', ['Unknown']))}\n"
|
100 |
+
formatted_results += f"- Published: {paper.get('published', 'Unknown')}\n"
|
101 |
+
formatted_results += f"- Categories: {', '.join(paper.get('categories', []))}\n"
|
102 |
+
if paper.get('pdf_url'):
|
103 |
+
formatted_results += f"- PDF: {paper['pdf_url']}\n"
|
104 |
+
if paper.get('summary'):
|
105 |
+
formatted_results += f"- Summary: {paper['summary']}\n"
|
106 |
+
formatted_results += "\n"
|
107 |
+
|
108 |
+
return formatted_results if papers else f"No papers found in the last {days_back} days."
|
109 |
+
|
110 |
+
except Exception as e:
|
111 |
+
return f"Error getting latest papers: {str(e)}"
|