import arxiv from datetime import datetime, timedelta import json import os from typing import List, Dict from smolagents import Tool class ArxivSearchTool(Tool): name = "search_arxiv" description = "Search ArXiv for papers matching the query" input_types = {"query": str, "max_results": int} output_type = List[Dict] def __call__(self, query: str = "artificial intelligence", max_results: int = 50) -> List[Dict]: try: # Configure the search client client = arxiv.Client() # Create the search query search = arxiv.Search( query=query, max_results=max_results, sort_by=arxiv.SortCriterion.SubmittedDate ) # Get results results = [] for paper in client.results(search): result = { 'title': paper.title, 'authors': [str(author) for author in paper.authors], 'summary': paper.summary, 'published': paper.published.strftime("%Y-%m-%d"), 'pdf_url': paper.pdf_url, 'entry_id': paper.entry_id, 'primary_category': paper.primary_category, 'categories': paper.categories } results.append(result) return results except Exception as e: return [{"error": f"Error searching ArXiv: {str(e)}"}] class LatestPapersTool(Tool): name = "get_latest_papers" description = "Get papers from the last N days from saved results" input_types = {"days_back": int} output_type = List[Dict] def __call__(self, days_back: int = 1) -> List[Dict]: papers = [] base_dir = "daily_papers" # Get dates to check dates = [ (datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(days_back) ] # Load papers for each date for date in dates: file_path = os.path.join(base_dir, f"ai_papers_{date}.json") if os.path.exists(file_path): with open(file_path, 'r', encoding='utf-8') as f: day_papers = json.load(f) papers.extend(day_papers) return papers def save_daily_papers(output_dir: str = "daily_papers") -> List[Dict]: """Helper function to save daily papers - not exposed as a tool""" os.makedirs(output_dir, exist_ok=True) today = datetime.now().strftime("%Y-%m-%d") arxiv_tool = ArxivSearchTool() papers = arxiv_tool( query='cat:cs.AI OR cat:cs.LG OR cat:cs.CL OR "artificial intelligence"', max_results=100 ) today_papers = [ paper for paper in papers if paper.get('published') == today ] output_file = os.path.join(output_dir, f"ai_papers_{today}.json") with open(output_file, 'w', encoding='utf-8') as f: json.dump(today_papers, f, indent=2) return today_papers