lcipolina commited on
Commit
3d99af2
·
verified ·
1 Parent(s): bd5bff8

Update app_old.py

Browse files
Files changed (1) hide show
  1. app_old.py +97 -153
app_old.py CHANGED
@@ -1,166 +1,110 @@
1
  import os
2
  import json
 
 
3
  import pandas as pd
4
  import gradio as gr
5
- from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
6
- from simulators.tic_tac_toe_simulator import TicTacToeSimulator
7
- from simulators.prisoners_dilemma_simulator import PrisonersDilemmaSimulator
8
- from simulators.rock_paper_scissors_simulator import RockPaperScissorsSimulator
9
- from games_registry import GAMES_REGISTRY
10
- from simulators.base_simulator import PlayerType
11
- from typing import Dict
12
-
13
- # Extract available LLM models from the registry
14
- llm_models = list(LLM_REGISTRY.keys())
15
-
16
- # List of available games (manually defined for now)
17
- games_list = [
18
- "rock_paper_scissors",
19
- "prisoners_dilemma",
20
- "tic_tac_toe",
21
- "connect_four",
22
- "matching_pennies",
23
- "kuhn_poker",
24
- ]
25
-
26
- # Special leaderboard option for aggregating stats across all games
27
- games_list.insert(0, "Total Performance")
28
-
29
- # File to persist game results
30
- RESULTS_TRACKER_FILE = "results_tracker.json"
31
-
32
- # Load or initialize the results tracker
33
- if os.path.exists(RESULTS_TRACKER_FILE):
34
- with open(RESULTS_TRACKER_FILE, "r") as f:
35
- results_tracker = json.load(f)
36
- else:
37
- # Initialize tracking for all LLMs and games
38
- results_tracker = {
39
- llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
40
- "win-rate": 0, "vs Random": 0} for game in games_list[1:]}
41
- for llm in llm_models
42
- }
43
-
44
- def save_results_tracker():
45
- """Save the results tracker to a JSON file."""
46
- with open(RESULTS_TRACKER_FILE, "w") as f:
47
- json.dump(results_tracker, f, indent=4)
48
-
49
- def generate_stats_file(model_name: str) -> str:
50
- """Generate a JSON file with detailed statistics for the selected LLM model."""
51
- file_path = f"{model_name}_stats.json"
52
- with open(file_path, "w") as f:
53
- json.dump(results_tracker.get(model_name, {}), f, indent=4)
54
- return file_path
55
-
56
- def provide_download_file(model_name):
57
- """Creates a downloadable JSON file with stats for the selected model."""
58
- return generate_stats_file(model_name)
59
-
60
- def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
61
- """
62
- Generate a structured leaderboard table.
63
- - If a specific game is selected, returns performance stats per LLM for that game.
64
- - If 'Total Performance' is selected, aggregates stats across all games.
65
- """
66
- leaderboard_df = pd.DataFrame(
67
- index=llm_models,
68
- columns=["# games", "moves/game", "illegal-moves", "win-rate", "vs Random"]
69
- )
70
-
71
- for llm in llm_models:
72
- if selected_game == "Total Performance":
73
- # Aggregate stats across all games
74
- total_games = sum(results_tracker[llm][game]["games"] for game in games_list[1:])
75
- total_moves = sum(results_tracker[llm][game]["moves/game"] * results_tracker[llm][game]["games"]
76
- for game in games_list[1:])
77
- total_illegal_moves = sum(results_tracker[llm][game]["illegal-moves"] for game in games_list[1:])
78
- avg_win_rate = sum(results_tracker[llm][game]["win-rate"] * results_tracker[llm][game]["games"]
79
- for game in games_list[1:]) / total_games if total_games > 0 else 0
80
- avg_vs_random = sum(results_tracker[llm][game]["vs Random"] * results_tracker[llm][game]["games"]
81
- for game in games_list[1:]) / total_games if total_games > 0 else 0
82
-
83
- leaderboard_df.loc[llm] = [
84
- total_games,
85
- f"{(total_moves / total_games) if total_games > 0 else 0:.1f}",
86
- total_illegal_moves,
87
- f"{avg_win_rate:.1f}%",
88
- f"{avg_vs_random:.1f}%"
89
- ]
90
  else:
91
- # Retrieve stats for the selected game
92
- game_stats = results_tracker[llm].get(selected_game, {})
93
- leaderboard_df.loc[llm] = [
94
- game_stats.get("games", 0),
95
- game_stats.get("moves/game", 0),
96
- game_stats.get("illegal-moves", 0),
97
- f"{game_stats.get('win-rate', 0):.1f}%",
98
- f"{game_stats.get('vs Random', 0):.1f}%"
99
- ]
100
-
101
- leaderboard_df = leaderboard_df.reset_index()
102
- leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
 
 
 
103
  return leaderboard_df
104
 
105
- def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
106
- """Simulates a game session with the chosen players and logs results."""
107
- llms = {}
108
- if player1_type == "llm":
109
- llms["Player 1"] = player1_model
110
- if player2_type == "llm":
111
- llms["Player 2"] = player2_model
112
-
113
- simulator_class = GAMES_REGISTRY[game_name]
114
- simulator = simulator_class(game_name, llms=llms)
115
- game_states = []
116
-
117
- def log_fn(state):
118
- """Logs the current game state and available moves."""
119
- current_player = state.current_player()
120
- legal_moves = state.legal_actions(current_player)
121
- board = str(state)
122
- game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")
123
 
124
- results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
125
- return "\n".join(game_states) + f"\nGame Result: {results}"
126
-
127
- # Gradio Interface
128
  with gr.Blocks() as interface:
129
- # Game Arena Tab
130
- with gr.Tab("Game Arena"):
131
- gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")
132
-
133
- game_dropdown = gr.Dropdown(choices=games_list[1:], label="Select a Game", value=games_list[1])
134
- player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
135
- player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
136
- player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
137
- player2_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 2 Model", visible=False)
138
- rounds_slider = gr.Slider(1, 10, step=1, label="Rounds")
139
- result_output = gr.Textbox(label="Game Result")
140
-
141
- play_button = gr.Button("Play Game")
142
- play_button.click(
143
- play_game,
144
- inputs=[game_dropdown, player1_dropdown, player2_dropdown, player1_model_dropdown, player2_model_dropdown, rounds_slider],
145
- outputs=result_output,
146
- )
147
-
148
- # Leaderboard Tab
149
  with gr.Tab("Leaderboard"):
150
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
151
-
152
- game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value="Total Performance")
153
- leaderboard_table = gr.Dataframe(value=calculate_leaderboard("Total Performance"), label="Leaderboard")
154
- model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
155
- download_button = gr.File(label="Download Statistics File")
156
  refresh_button = gr.Button("Refresh Leaderboard")
 
 
 
 
157
 
158
- def update_leaderboard(selected_game):
159
- """Updates the leaderboard based on the selected game."""
160
- return calculate_leaderboard(selected_game)
161
-
162
- model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
163
- game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
164
- refresh_button.click(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
165
-
166
- interface.launch()
 
1
  import os
2
  import json
3
+ import sqlite3
4
+ import glob
5
  import pandas as pd
6
  import gradio as gr
7
+ from datetime import datetime
8
+ from typing import Dict, List
9
+
10
+ # Directory to store SQLite results
11
+ db_dir = "results/"
12
+
13
+ def find_or_download_db():
14
+ """Check if SQLite .db files exist; if not, attempt to download from cloud storage."""
15
+ if not os.path.exists(db_dir):
16
+ os.makedirs(db_dir)
17
+ db_files = glob.glob(os.path.join(db_dir, "*.db"))
18
+
19
+ # Ensure the random bot database exists
20
+ if "results/random_None.db" not in db_files:
21
+ raise FileNotFoundError("Please upload results for the random agent in a file named 'random_None.db'.")
22
+
23
+ return db_files
24
+
25
+ def extract_agent_info(filename: str):
26
+ """Extract agent type and model name from the filename."""
27
+ base_name = os.path.basename(filename).replace(".db", "")
28
+ parts = base_name.split("_", 1)
29
+ if len(parts) == 2:
30
+ agent_type, model_name = parts
31
+ else:
32
+ agent_type, model_name = parts[0], "Unknown"
33
+ return agent_type, model_name
34
+
35
+ def get_available_games() -> List[str]:
36
+ """Extracts all unique game names from all SQLite databases and includes 'Total Performance'."""
37
+ db_files = find_or_download_db()
38
+ game_names = set()
39
+
40
+ for db_file in db_files:
41
+ conn = sqlite3.connect(db_file)
42
+ try:
43
+ query = "SELECT DISTINCT game_name FROM moves"
44
+ df = pd.read_sql_query(query, conn)
45
+ game_names.update(df["game_name"].tolist())
46
+ except Exception:
47
+ pass # Ignore errors if table doesn't exist
48
+ finally:
49
+ conn.close()
50
+
51
+ game_list = sorted(game_names) if game_names else ["No Games Found"]
52
+ game_list.insert(0, "Total Performance") # Ensure 'Total Performance' is always first
53
+ return game_list
54
+
55
+ def extract_leaderboard_stats(game_name: str) -> pd.DataFrame:
56
+ """Extract and aggregate leaderboard stats from all SQLite databases."""
57
+ db_files = find_or_download_db()
58
+ all_stats = []
59
+
60
+ for db_file in db_files:
61
+ conn = sqlite3.connect(db_file)
62
+ agent_type, model_name = extract_agent_info(db_file)
63
+
64
+ if game_name == "Total Performance":
65
+ query = "SELECT game_name, COUNT(DISTINCT episode) AS games_played, " \
66
+ "AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \
67
+ "FROM game_results GROUP BY game_name"
68
+ df = pd.read_sql_query(query, conn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  else:
70
+ query = "SELECT COUNT(DISTINCT episode) AS games_played, " \
71
+ "AVG(generation_time) AS avg_gen_time, SUM(reward) AS total_rewards " \
72
+ "FROM game_results WHERE game_name = ?"
73
+ df = pd.read_sql_query(query, conn, params=(game_name,))
74
+
75
+ df["agent_name"] = model_name
76
+ df["agent_type"] = agent_type
77
+ all_stats.append(df)
78
+ conn.close()
79
+
80
+ leaderboard_df = pd.concat(all_stats, ignore_index=True) if all_stats else pd.DataFrame()
81
+
82
+ if leaderboard_df.empty:
83
+ leaderboard_df = pd.DataFrame(columns=["LLM Model", "# games", "moves/game", "illegal-moves", "win-rate", "vs Random"])
84
+
85
  return leaderboard_df
86
 
87
+ def generate_leaderboard_json():
88
+ """Generate a JSON file containing leaderboard stats."""
89
+ available_games = get_available_games()
90
+ leaderboard = extract_leaderboard_stats("Total Performance").to_dict(orient="records")
91
+ json_file = "results/leaderboard_stats.json"
92
+ with open(json_file, "w", encoding="utf-8") as f:
93
+ json.dump({"timestamp": datetime.utcnow().isoformat(), "leaderboard": leaderboard}, f, indent=4)
94
+ return json_file
 
 
 
 
 
 
 
 
 
 
95
 
 
 
 
 
96
  with gr.Blocks() as interface:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  with gr.Tab("Leaderboard"):
98
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
99
+ available_games = get_available_games()
100
+ leaderboard_game_dropdown = gr.Dropdown(available_games, label="Select Game", value="Total Performance")
101
+ leaderboard_table = gr.Dataframe(headers=["LLM Model", "# games", "moves/game", "illegal-moves", "win-rate", "vs Random"])
102
+ generate_button = gr.Button("Generate Leaderboard JSON")
103
+ download_component = gr.File(label="Download Leaderboard JSON")
104
  refresh_button = gr.Button("Refresh Leaderboard")
105
+
106
+ leaderboard_game_dropdown.change(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table])
107
+ refresh_button.click(extract_leaderboard_stats, inputs=[leaderboard_game_dropdown], outputs=[leaderboard_table])
108
+ generate_button.click(generate_leaderboard_json, outputs=[download_component])
109
 
110
+ interface.launch()