File size: 2,923 Bytes
3fe1351
 
 
 
 
 
 
 
a0f1951
3fe1351
 
 
 
 
 
 
 
 
a0f1951
3fe1351
 
 
 
 
 
 
 
a0f1951
3fe1351
 
e2de670
eebc6a3
e2de670
 
 
 
 
 
135ada9
e2de670
 
 
 
 
 
 
135ada9
e2de670
 
 
 
 
 
 
 
 
 
 
 
 
 
3fe1351
e2de670
 
 
 
 
 
 
 
 
 
 
3fe1351
a0f1951
f6dd71f
 
3fe1351
f6dd71f
3fe1351
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
import numpy as np
import argparse
import os
from datasets import load_dataset

HOST = os.environ.get("API_URL", "0.0.0.0")
PORT = os.environ.get("PORT", 7860)
parser = argparse.ArgumentParser()
parser.add_argument("--host", default=HOST)
parser.add_argument("--port", type=int, default=PORT)
parser.add_argument("--reload", action="store_true", default=True)
parser.add_argument("--ssl_certfile")
parser.add_argument("--ssl_keyfile")
args = parser.parse_args()

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/api/results")
async def get_results():
    # Load the dataset
    dataset = load_dataset("smolagents/results", "2024-12-26")
    # Convert to list for processing
    df = dataset["train"].to_pandas()
    
    # Log some info to help debug
    print("Dataset loaded, shape:", df.shape)
    print("Columns:", df.columns)

    # Process the data to match frontend expectations
    result = []
    # Ensure we have the expected columns
    expected_columns = ['model_id', 'agent_action_type', 'source', 'acc']
    for col in expected_columns:
        if col not in df.columns:
            print(f"Warning: Column {col} not found in dataset")
            
    # Group by model_id and agent_action_type to create the expected structure
    for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
        # Calculate scores for each benchmark
        benchmark_scores = {}
        benchmarks = ['GAIA', 'MATH', 'SimpleQA']
        
        for benchmark in benchmarks:
            benchmark_group = group[group['source'] == benchmark]
            if not benchmark_group.empty:
                benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100  # Convert to percentage
        
        # Calculate average if we have at least one benchmark score
        if benchmark_scores:
            benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
        
        # Add entry to result
        result.append({
            'model_id': model_id,
            'agent_action_type': agent_action_type,
            'scores': benchmark_scores
        })
    
    print(f"Processed {len(result)} entries for the frontend")
    # Return the properly formatted data as a JSON response
    return result 
    return data
        



app.mount("/", StaticFiles(directory="static", html=True), name="static")

if __name__ == "__main__":
    import uvicorn
    print(args)
    uvicorn.run(
        "app:app",
        host=args.host,
        port=args.port,
        reload=args.reload,
        ssl_certfile=args.ssl_certfile,
        ssl_keyfile=args.ssl_keyfile,
    )