First_agent_template / tools /arxiv_tool.py
Ferocious0xide's picture
Create arxiv_tool.py
d4c1ac1 verified
raw
history blame
3.14 kB
import arxiv
from datetime import datetime, timedelta
import json
import os
from typing import List, Dict
from smolagents import Tool
class ArxivSearchTool(Tool):
name = "search_arxiv"
description = "Search ArXiv for papers matching the query"
input_types = {"query": str, "max_results": int}
output_type = List[Dict]
def __call__(self, query: str = "artificial intelligence",
max_results: int = 50) -> List[Dict]:
try:
# Configure the search client
client = arxiv.Client()
# Create the search query
search = arxiv.Search(
query=query,
max_results=max_results,
sort_by=arxiv.SortCriterion.SubmittedDate
)
# Get results
results = []
for paper in client.results(search):
result = {
'title': paper.title,
'authors': [str(author) for author in paper.authors],
'summary': paper.summary,
'published': paper.published.strftime("%Y-%m-%d"),
'pdf_url': paper.pdf_url,
'entry_id': paper.entry_id,
'primary_category': paper.primary_category,
'categories': paper.categories
}
results.append(result)
return results
except Exception as e:
return [{"error": f"Error searching ArXiv: {str(e)}"}]
class LatestPapersTool(Tool):
name = "get_latest_papers"
description = "Get papers from the last N days from saved results"
input_types = {"days_back": int}
output_type = List[Dict]
def __call__(self, days_back: int = 1) -> List[Dict]:
papers = []
base_dir = "daily_papers"
# Get dates to check
dates = [
(datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
for i in range(days_back)
]
# Load papers for each date
for date in dates:
file_path = os.path.join(base_dir, f"ai_papers_{date}.json")
if os.path.exists(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
day_papers = json.load(f)
papers.extend(day_papers)
return papers
def save_daily_papers(output_dir: str = "daily_papers") -> List[Dict]:
"""Helper function to save daily papers - not exposed as a tool"""
os.makedirs(output_dir, exist_ok=True)
today = datetime.now().strftime("%Y-%m-%d")
arxiv_tool = ArxivSearchTool()
papers = arxiv_tool(
query='cat:cs.AI OR cat:cs.LG OR cat:cs.CL OR "artificial intelligence"',
max_results=100
)
today_papers = [
paper for paper in papers
if paper.get('published') == today
]
output_file = os.path.join(output_dir, f"ai_papers_{today}.json")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(today_papers, f, indent=2)
return today_papers