File size: 2,923 Bytes
3fe1351 a0f1951 3fe1351 a0f1951 3fe1351 a0f1951 3fe1351 e2de670 eebc6a3 e2de670 135ada9 e2de670 135ada9 e2de670 3fe1351 e2de670 3fe1351 a0f1951 f6dd71f 3fe1351 f6dd71f 3fe1351 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
import numpy as np
import argparse
import os
from datasets import load_dataset
HOST = os.environ.get("API_URL", "0.0.0.0")
PORT = os.environ.get("PORT", 7860)
parser = argparse.ArgumentParser()
parser.add_argument("--host", default=HOST)
parser.add_argument("--port", type=int, default=PORT)
parser.add_argument("--reload", action="store_true", default=True)
parser.add_argument("--ssl_certfile")
parser.add_argument("--ssl_keyfile")
args = parser.parse_args()
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/api/results")
async def get_results():
# Load the dataset
dataset = load_dataset("smolagents/results", "2024-12-26")
# Convert to list for processing
df = dataset["train"].to_pandas()
# Log some info to help debug
print("Dataset loaded, shape:", df.shape)
print("Columns:", df.columns)
# Process the data to match frontend expectations
result = []
# Ensure we have the expected columns
expected_columns = ['model_id', 'agent_action_type', 'source', 'acc']
for col in expected_columns:
if col not in df.columns:
print(f"Warning: Column {col} not found in dataset")
# Group by model_id and agent_action_type to create the expected structure
for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
# Calculate scores for each benchmark
benchmark_scores = {}
benchmarks = ['GAIA', 'MATH', 'SimpleQA']
for benchmark in benchmarks:
benchmark_group = group[group['source'] == benchmark]
if not benchmark_group.empty:
benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100 # Convert to percentage
# Calculate average if we have at least one benchmark score
if benchmark_scores:
benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)
# Add entry to result
result.append({
'model_id': model_id,
'agent_action_type': agent_action_type,
'scores': benchmark_scores
})
print(f"Processed {len(result)} entries for the frontend")
# Return the properly formatted data as a JSON response
return result
return data
app.mount("/", StaticFiles(directory="static", html=True), name="static")
if __name__ == "__main__":
import uvicorn
print(args)
uvicorn.run(
"app:app",
host=args.host,
port=args.port,
reload=args.reload,
ssl_certfile=args.ssl_certfile,
ssl_keyfile=args.ssl_keyfile,
) |