Spaces:
Sleeping
Sleeping
import os | |
import json | |
import sqlite3 | |
import glob | |
import pandas as pd | |
import gradio as gr | |
from datetime import datetime | |
from typing import Dict, List | |
# Directory to store SQLite results | |
db_dir = "results/" | |
def find_or_download_db(): | |
"""Check if SQLite .db files exist; if not, attempt to download from cloud storage.""" | |
if not os.path.exists(db_dir): | |
os.makedirs(db_dir) | |
db_files = glob.glob(os.path.join(db_dir, "*.db")) | |
# Ensure the random bot database exists | |
if "results/random_None.db" not in db_files: | |
raise FileNotFoundError("Please upload results for the random agent in a file named 'random_None.db'.") | |
return db_files | |
def extract_agent_info(filename: str): | |
"""Extract agent type and model name from the filename.""" | |
base_name = os.path.basename(filename).replace(".db", "") | |
parts = base_name.split("_", 1) | |
if len(parts) == 2: | |
agent_type, model_name = parts | |
else: | |
agent_type, model_name = parts[0], "Unknown" | |
return agent_type, model_name | |
def get_available_games() -> List[str]: | |
"""Extracts all unique game names from all SQLite databases and includes 'Total Performance'.""" | |
db_files = find_or_download_db() | |
game_names = set() | |
for db_file in db_files: | |
conn = sqlite3.connect(db_file) | |
try: | |
query = "SELECT DISTINCT game_name FROM moves" | |
df = pd.read_sql_query(query, conn) | |
game_names.update(df["game_name"].tolist()) | |
except Exception: | |
pass # Ignore errors if table doesn't exist | |
finally: | |
conn.close() | |
game_list = sorted(game_names) if game_names else ["No Games Found"] | |
game_list.insert(0, "Total Performance") # Ensure 'Total Performance' is always first | |
return game_list | |
def extract_leaderboard_stats(game_name: str) -> pd.DataFrame: | |
"""Extract and aggregate leaderboard stats from all SQLite databases.""" | |
db_files = find_or_download_db() | |
all_stats = [] | |
for db_file in db_files: | |
conn = sqlite3.connect(db_file) | |
agent_type, model_name = extract_agent_info(db_file) | |
if game_name == "Total Performance": | |
query = "SELECT game_name, COUNT(DISTINCT episode) AS games_played, " \ | |
"AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \ | |
"FROM game_results GROUP BY game_name" | |
df = pd.read_sql_query(query, conn) | |
else: | |
query = "SELECT COUNT(DISTINCT episode) AS games_played, " \ | |
"AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \ | |
"FROM game_results WHERE game_name = ?" | |
df = pd.read_sql_query(query, conn, params=(game_name,)) | |
df["agent_name"] = model_name | |
df["agent_type"] = agent_type | |
all_stats.append(df) | |
conn.close() | |
leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame() | |
if leaderboard_df.empty: | |
leaderboard_df = pd.DataFrame(columns=["LLM Model", "# games", "moves/game", "illegal-moves", "win-rate", "vs Random"]) | |
return leaderboard_df | |
def generate_leaderboard_json(): | |
"""Generate a JSON file containing leaderboard stats.""" | |
available_games = get_available_games() | |
leaderboard = extract_leaderboard_stats("Total Performance").to_dict(orient="records") | |
json_file = "results/leaderboard_stats.json" | |
with open(json_file, "w", encoding="utf-8") as f: | |
json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4) | |
return json_file | |
with gr.Blocks() as interface: | |
with gr.Tab("Leaderboard"): | |
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!") | |
available_games = get_available_games() | |
leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance") | |
leaderboard_table = gr.Dataframe(headers=["LLM Model", "# games", "moves/game", "illegal-moves", "win-rate", "vs Random"]) | |
generate_button = gr.Button("Generate Leaderboard JSON") | |
download_component = gr.File(label="Download Leaderboard JSON") | |
refresh_button = gr.Button("Refresh Leaderboard") | |
leaderboard_game_dropdown.change(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table]) | |
refresh_button.click(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table]) | |
generate_button.click(generate_leaderboard_json, outputs=[download_component]) | |
interface.launch() |