Spaces:

smolagents
/

smolagents-leaderboard

Running

App Files Files Community

smolagents-leaderboard / app.py

m-ric HF Staff

Update app.py

eebc6a3 verified 21 days ago

raw

history blame contribute delete

2.92 kB

	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from fastapi.staticfiles import StaticFiles
	import numpy as np
	import argparse
	import os
	from datasets import load_dataset

	HOST = os.environ.get("API_URL", "0.0.0.0")
	PORT = os.environ.get("PORT", 7860)
	parser = argparse.ArgumentParser()
	parser.add_argument("--host", default=HOST)
	parser.add_argument("--port", type=int, default=PORT)
	parser.add_argument("--reload", action="store_true", default=True)
	parser.add_argument("--ssl_certfile")
	parser.add_argument("--ssl_keyfile")
	args = parser.parse_args()

	app = FastAPI()
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	@app.get("/api/results")
	async def get_results():
	# Load the dataset
	dataset = load_dataset("smolagents/results", "2024-12-26")
	# Convert to list for processing
	df = dataset["train"].to_pandas()

	# Log some info to help debug
	print("Dataset loaded, shape:", df.shape)
	print("Columns:", df.columns)

	# Process the data to match frontend expectations
	result = []
	# Ensure we have the expected columns
	expected_columns = ['model_id', 'agent_action_type', 'source', 'acc']
	for col in expected_columns:
	if col not in df.columns:
	print(f"Warning: Column {col} not found in dataset")

	# Group by model_id and agent_action_type to create the expected structure
	for (model_id, agent_action_type), group in df.groupby(['model_id', 'agent_action_type']):
	# Calculate scores for each benchmark
	benchmark_scores = {}
	benchmarks = ['GAIA', 'MATH', 'SimpleQA']

	for benchmark in benchmarks:
	benchmark_group = group[group['source'] == benchmark]
	if not benchmark_group.empty:
	benchmark_scores[benchmark] = benchmark_group['acc'].mean() * 100 # Convert to percentage

	# Calculate average if we have at least one benchmark score
	if benchmark_scores:
	benchmark_scores['Average'] = sum(benchmark_scores.values()) / len(benchmark_scores)

	# Add entry to result
	result.append({
	'model_id': model_id,
	'agent_action_type': agent_action_type,
	'scores': benchmark_scores
	})

	print(f"Processed {len(result)} entries for the frontend")
	# Return the properly formatted data as a JSON response
	return result
	return data




	app.mount("/", StaticFiles(directory="static", html=True), name="static")

	if __name__ == "__main__":
	import uvicorn
	print(args)
	uvicorn.run(
	"app:app",
	host=args.host,
	port=args.port,
	reload=args.reload,
	ssl_certfile=args.ssl_certfile,
	ssl_keyfile=args.ssl_keyfile,
	)