File size: 3,141 Bytes
d4c1ac1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import arxiv
from datetime import datetime, timedelta
import json
import os
from typing import List, Dict
from smolagents import Tool

class ArxivSearchTool(Tool):
    name = "search_arxiv"
    description = "Search ArXiv for papers matching the query"
    input_types = {"query": str, "max_results": int}
    output_type = List[Dict]

    def __call__(self, query: str = "artificial intelligence", 
                max_results: int = 50) -> List[Dict]:
        try:
            # Configure the search client
            client = arxiv.Client()
            
            # Create the search query
            search = arxiv.Search(
                query=query,
                max_results=max_results,
                sort_by=arxiv.SortCriterion.SubmittedDate
            )
            
            # Get results
            results = []
            for paper in client.results(search):
                result = {
                    'title': paper.title,
                    'authors': [str(author) for author in paper.authors],
                    'summary': paper.summary,
                    'published': paper.published.strftime("%Y-%m-%d"),
                    'pdf_url': paper.pdf_url,
                    'entry_id': paper.entry_id,
                    'primary_category': paper.primary_category,
                    'categories': paper.categories
                }
                results.append(result)
                
            return results
        except Exception as e:
            return [{"error": f"Error searching ArXiv: {str(e)}"}]

class LatestPapersTool(Tool):
    name = "get_latest_papers"
    description = "Get papers from the last N days from saved results"
    input_types = {"days_back": int}
    output_type = List[Dict]

    def __call__(self, days_back: int = 1) -> List[Dict]:
        papers = []
        base_dir = "daily_papers"
        
        # Get dates to check
        dates = [
            (datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
            for i in range(days_back)
        ]
        
        # Load papers for each date
        for date in dates:
            file_path = os.path.join(base_dir, f"ai_papers_{date}.json")
            if os.path.exists(file_path):
                with open(file_path, 'r', encoding='utf-8') as f:
                    day_papers = json.load(f)
                    papers.extend(day_papers)
        
        return papers

def save_daily_papers(output_dir: str = "daily_papers") -> List[Dict]:
    """Helper function to save daily papers - not exposed as a tool"""
    os.makedirs(output_dir, exist_ok=True)
    today = datetime.now().strftime("%Y-%m-%d")
    
    arxiv_tool = ArxivSearchTool()
    papers = arxiv_tool(
        query='cat:cs.AI OR cat:cs.LG OR cat:cs.CL OR "artificial intelligence"',
        max_results=100
    )
    
    today_papers = [
        paper for paper in papers 
        if paper.get('published') == today
    ]
    
    output_file = os.path.join(output_dir, f"ai_papers_{today}.json")
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(today_papers, f, indent=2)
    
    return today_papers