Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -80,15 +80,53 @@ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
|
|
80 |
|
81 |
# Calculate win rate against random bot
|
82 |
vs_random_query = """
|
83 |
-
SELECT COUNT(*) FROM game_results
|
84 |
-
|
85 |
-
WHERE gr.game_name = ? AND m.opponent = 'random_None' AND gr.reward > 0
|
86 |
"""
|
87 |
total_vs_random_query = """
|
88 |
-
SELECT COUNT(*) FROM game_results
|
89 |
-
|
90 |
-
WHERE gr.game_name = ? AND m.opponent = 'random_None'
|
91 |
"""
|
92 |
wins_vs_random = conn.execute(vs_random_query, (game_name,)).fetchone()[0] or 0
|
93 |
total_vs_random = conn.execute(total_vs_random_query, (game_name,)).fetchone()[0] or 0
|
94 |
-
vs_random_rate = (wins_vs_random / total_vs_random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# Calculate win rate against random bot
|
82 |
vs_random_query = """
|
83 |
+
SELECT COUNT(*) FROM game_results
|
84 |
+
WHERE game_name = ? AND opponent = 'random_None' AND reward > 0
|
|
|
85 |
"""
|
86 |
total_vs_random_query = """
|
87 |
+
SELECT COUNT(*) FROM game_results
|
88 |
+
WHERE game_name = ? AND opponent = 'random_None'
|
|
|
89 |
"""
|
90 |
wins_vs_random = conn.execute(vs_random_query, (game_name,)).fetchone()[0] or 0
|
91 |
total_vs_random = conn.execute(total_vs_random_query, (game_name,)).fetchone()[0] or 0
|
92 |
+
vs_random_rate = (wins_vs_random / total_vs_random * 100) if total_vs_random > 0 else 0
|
93 |
+
|
94 |
+
df["agent_name"] = model_name
|
95 |
+
df["agent_type"] = agent_type
|
96 |
+
df["avg_generation_time"] = round(avg_gen_time, 2)
|
97 |
+
df["vs_random"] = round(vs_random_rate, 2)
|
98 |
+
|
99 |
+
all_stats.append(df)
|
100 |
+
conn.close()
|
101 |
+
|
102 |
+
leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame()
|
103 |
+
|
104 |
+
if leaderboard_df.empty:
|
105 |
+
leaderboard_df = pd.DataFrame(columns=["LLM Model", "# games", "total rewards", "avg gen time", "win-rate", "vs Random"])
|
106 |
+
|
107 |
+
return leaderboard_df
|
108 |
+
|
109 |
+
def generate_leaderboard_json():
|
110 |
+
"""Generate a JSON file containing leaderboard stats."""
|
111 |
+
available_games = get_available_games()
|
112 |
+
leaderboard = extract_leaderboard_stats("Total Performance").to_dict(orient="records")
|
113 |
+
json_file = "results/leaderboard_stats.json"
|
114 |
+
with open(json_file, "w", encoding="utf-8") as f:
|
115 |
+
json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
|
116 |
+
return json_file
|
117 |
+
|
118 |
+
with gr.Blocks() as interface:
|
119 |
+
with gr.Tab("Leaderboard"):
|
120 |
+
gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
|
121 |
+
available_games = get_available_games()
|
122 |
+
leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance")
|
123 |
+
leaderboard_table = gr.Dataframe(headers=["LLM Model", "# games", "total rewards", "avg gen time", "win-rate", "vs Random"])
|
124 |
+
generate_button = gr.Button("Generate Leaderboard JSON")
|
125 |
+
download_component = gr.File(label="Download Leaderboard JSON")
|
126 |
+
refresh_button = gr.Button("Refresh Leaderboard")
|
127 |
+
|
128 |
+
leaderboard_game_dropdown.change(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table])
|
129 |
+
refresh_button.click(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table])
|
130 |
+
generate_button.click(generate_leaderboard_json, outputs=[download_component])
|
131 |
+
|
132 |
+
interface.launch()
|