Spaces:

smolagents
/

smolagents-leaderboard

Running

File size: 2,923 Bytes

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
import numpy as np
import argparse
import os
from datasets import load_dataset

HOST = os.environ.get("API_URL", "0.0.0.0")
PORT = os.environ.get("PORT", 7860)
parser = argparse.ArgumentParser()
parser.add_argument("--host", default=HOST)
parser.add_argument("--port", type=int, default=PORT)
parser.add_argument("--reload", action="store_true", default=True)
parser.add_argument("--ssl_certfile")
parser.add_argument("--ssl_keyfile")
args = parser.parse_args()

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/api/results")
async def get_results():
    # Load the dataset
    dataset = load_dataset("smolagents/results", "2024-12-26")
    # Convert to list for processing
    df = dataset["train"].to_pandas()
    
    # Log some info to help debug
    print("Dataset loaded, shape:", df.shape)
    print("Columns:", df.columns)

    # Process the data to match frontend expectations
    result = []
    # Ensure we have the expected columns
    expected_columns = ['model_id', 'agent_action_type', 'source', 'acc']
    for col in expected_columns:
        if col not in df.columns:
            print(f"Warning: Column {col} not found in dataset")
            
    # Group by model_id and agent_action_type to create the expected structure
    for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
        # Calculate scores for each benchmark
        benchmark_scores = {}
        benchmarks = ['GAIA', 'MATH', 'SimpleQA']
        
        for benchmark in benchmarks:
            benchmark_group = group[group['source'] == benchmark]
            if not benchmark_group.empty:
                benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100  # Convert to percentage
        
        # Calculate average if we have at least one benchmark score
        if benchmark_scores:
            benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
        
        # Add entry to result
        result.append({
            'model_id': model_id,
            'agent_action_type': agent_action_type,
            'scores': benchmark_scores
        })
    
    print(f"Processed {len(result)} entries for the frontend")
    # Return the properly formatted data as a JSON response
    return result 
    return data
        



app.mount("/", StaticFiles(directory="static", html=True), name="static")

if __name__ == "__main__":
    import uvicorn
    print(args)
    uvicorn.run(
        "app:app",
        host=args.host,
        port=args.port,
        reload=args.reload,
        ssl_certfile=args.ssl_certfile,
        ssl_keyfile=args.ssl_keyfile,
    )