lcipolina commited on
Commit
70c682f
·
verified ·
1 Parent(s): 634c45e

Trying to add a 'totalling' option in the scroll down

Browse files
Files changed (1) hide show
  1. app.py +64 -38
app.py CHANGED
@@ -10,11 +10,10 @@ from games_registry import GAMES_REGISTRY
10
  from simulators.base_simulator import PlayerType
11
  from typing import Dict
12
 
13
- # Extract available LLM models
14
  llm_models = list(LLM_REGISTRY.keys())
15
 
16
- # Define game list manually (for now)
17
- #games_list = list(GAMES_REGISTRY.keys())
18
  games_list = [
19
  "rock_paper_scissors",
20
  "prisoners_dilemma",
@@ -24,32 +23,21 @@ games_list = [
24
  "kuhn_poker",
25
  ]
26
 
27
- # File to persist results
28
- RESULTS_TRACKER_FILE = "results_tracker.json"
29
-
30
- def generate_stats_file(model_name: str):
31
- """Generate a JSON file with detailed statistics for the selected LLM model."""
32
- file_path = f"{model_name}_stats.json"
33
- with open(file_path, "w") as f:
34
- json.dump(results_tracker.get(model_name, {}), f, indent=4)
35
- return file_path
36
-
37
- def provide_download_file(model_name):
38
- """Creates a downloadable JSON file with stats for the selected model."""
39
- return generate_stats_file(model_name)
40
 
41
- def refresh_leaderboard():
42
- """Manually refresh the leaderboard."""
43
- return calculate_leaderboard(game_dropdown.value)
44
 
45
  # Load or initialize the results tracker
46
  if os.path.exists(RESULTS_TRACKER_FILE):
47
  with open(RESULTS_TRACKER_FILE, "r") as f:
48
  results_tracker = json.load(f)
49
  else:
 
50
  results_tracker = {
51
  llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
52
- "win-rate": 0, "vs Random": 0} for game in games_list}
53
  for llm in llm_models
54
  }
55
 
@@ -58,28 +46,64 @@ def save_results_tracker():
58
  with open(RESULTS_TRACKER_FILE, "w") as f:
59
  json.dump(results_tracker, f, indent=4)
60
 
 
 
 
 
 
 
 
 
 
 
 
61
  def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
62
- """Generate a structured leaderboard table for the selected game."""
63
- leaderboard_df = pd.DataFrame(index=llm_models,
64
- columns=["# games", "moves/game",
65
- "illegal-moves", "win-rate", "vs Random"])
 
 
 
 
 
66
 
67
  for llm in llm_models:
68
- game_stats = results_tracker[llm].get(selected_game, {})
69
- leaderboard_df.loc[llm] = [
70
- game_stats.get("games", 0),
71
- game_stats.get("moves/game", 0),
72
- game_stats.get("illegal-moves", 0),
73
- f"{game_stats.get('win-rate', 0):.1f}%",
74
- f"{game_stats.get('vs Random', 0):.1f}%"
75
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  leaderboard_df = leaderboard_df.reset_index()
78
  leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
79
  return leaderboard_df
80
 
81
  def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
82
- """Play the selected game with specified players."""
83
  llms = {}
84
  if player1_type == "llm":
85
  llms["Player 1"] = player1_model
@@ -91,7 +115,7 @@ def play_game(game_name, player1_type, player2_type, player1_model, player2_mode
91
  game_states = []
92
 
93
  def log_fn(state):
94
- """Log current state and legal moves."""
95
  current_player = state.current_player()
96
  legal_moves = state.legal_actions(current_player)
97
  board = str(state)
@@ -102,10 +126,11 @@ def play_game(game_name, player1_type, player2_type, player1_model, player2_mode
102
 
103
  # Gradio Interface
104
  with gr.Blocks() as interface:
 
105
  with gr.Tab("Game Arena"):
106
  gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")
107
 
108
- game_dropdown = gr.Dropdown(choices=games_list, label="Select a Game", value=games_list[0])
109
  player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
110
  player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
111
  player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
@@ -120,17 +145,18 @@ with gr.Blocks() as interface:
120
  outputs=result_output,
121
  )
122
 
 
123
  with gr.Tab("Leaderboard"):
124
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
125
 
126
- game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
127
- leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
128
  model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
129
  download_button = gr.File(label="Download Statistics File")
130
  refresh_button = gr.Button("Refresh Leaderboard")
131
 
132
  def update_leaderboard(selected_game):
133
- """Updates the leaderboard table based on the selected game."""
134
  return calculate_leaderboard(selected_game)
135
 
136
  model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
 
10
  from simulators.base_simulator import PlayerType
11
  from typing import Dict
12
 
13
+ # Extract available LLM models from the registry
14
  llm_models = list(LLM_REGISTRY.keys())
15
 
16
+ # List of available games (manually defined for now)
 
17
  games_list = [
18
  "rock_paper_scissors",
19
  "prisoners_dilemma",
 
23
  "kuhn_poker",
24
  ]
25
 
26
+ # Special leaderboard option for aggregating stats across all games
27
+ games_list.insert(0, "Total Performance")
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # File to persist game results
30
+ RESULTS_TRACKER_FILE = "results_tracker.json"
 
31
 
32
  # Load or initialize the results tracker
33
  if os.path.exists(RESULTS_TRACKER_FILE):
34
  with open(RESULTS_TRACKER_FILE, "r") as f:
35
  results_tracker = json.load(f)
36
  else:
37
+ # Initialize tracking for all LLMs and games
38
  results_tracker = {
39
  llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
40
+ "win-rate": 0, "vs Random": 0} for game in games_list[1:]}
41
  for llm in llm_models
42
  }
43
 
 
46
  with open(RESULTS_TRACKER_FILE, "w") as f:
47
  json.dump(results_tracker, f, indent=4)
48
 
49
+ def generate_stats_file(model_name: str) -> str:
50
+ """Generate a JSON file with detailed statistics for the selected LLM model."""
51
+ file_path = f"{model_name}_stats.json"
52
+ with open(file_path, "w") as f:
53
+ json.dump(results_tracker.get(model_name, {}), f, indent=4)
54
+ return file_path
55
+
56
+ def provide_download_file(model_name):
57
+ """Creates a downloadable JSON file with stats for the selected model."""
58
+ return generate_stats_file(model_name)
59
+
60
  def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
61
+ """
62
+ Generate a structured leaderboard table.
63
+ - If a specific game is selected, returns performance stats per LLM for that game.
64
+ - If 'Total Performance' is selected, aggregates stats across all games.
65
+ """
66
+ leaderboard_df = pd.DataFrame(
67
+ index=llm_models,
68
+ columns=["# games", "moves/game", "illegal-moves", "win-rate", "vs Random"]
69
+ )
70
 
71
  for llm in llm_models:
72
+ if selected_game == "Total Performance":
73
+ # Aggregate stats across all games
74
+ total_games = sum(results_tracker[llm][game]["games"] for game in games_list[1:])
75
+ total_moves = sum(results_tracker[llm][game]["moves/game"] * results_tracker[llm][game]["games"]
76
+ for game in games_list[1:])
77
+ total_illegal_moves = sum(results_tracker[llm][game]["illegal-moves"] for game in games_list[1:])
78
+ avg_win_rate = sum(results_tracker[llm][game]["win-rate"] * results_tracker[llm][game]["games"]
79
+ for game in games_list[1:]) / total_games if total_games > 0 else 0
80
+ avg_vs_random = sum(results_tracker[llm][game]["vs Random"] * results_tracker[llm][game]["games"]
81
+ for game in games_list[1:]) / total_games if total_games > 0 else 0
82
+
83
+ leaderboard_df.loc[llm] = [
84
+ total_games,
85
+ f"{(total_moves / total_games) if total_games > 0 else 0:.1f}",
86
+ total_illegal_moves,
87
+ f"{avg_win_rate:.1f}%",
88
+ f"{avg_vs_random:.1f}%"
89
+ ]
90
+ else:
91
+ # Retrieve stats for the selected game
92
+ game_stats = results_tracker[llm].get(selected_game, {})
93
+ leaderboard_df.loc[llm] = [
94
+ game_stats.get("games", 0),
95
+ game_stats.get("moves/game", 0),
96
+ game_stats.get("illegal-moves", 0),
97
+ f"{game_stats.get('win-rate', 0):.1f}%",
98
+ f"{game_stats.get('vs Random', 0):.1f}%"
99
+ ]
100
 
101
  leaderboard_df = leaderboard_df.reset_index()
102
  leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
103
  return leaderboard_df
104
 
105
  def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
106
+ """Simulates a game session with the chosen players and logs results."""
107
  llms = {}
108
  if player1_type == "llm":
109
  llms["Player 1"] = player1_model
 
115
  game_states = []
116
 
117
  def log_fn(state):
118
+ """Logs the current game state and available moves."""
119
  current_player = state.current_player()
120
  legal_moves = state.legal_actions(current_player)
121
  board = str(state)
 
126
 
127
  # Gradio Interface
128
  with gr.Blocks() as interface:
129
+ # Game Arena Tab
130
  with gr.Tab("Game Arena"):
131
  gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")
132
 
133
+ game_dropdown = gr.Dropdown(choices=games_list[1:], label="Select a Game", value=games_list[1])
134
  player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
135
  player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
136
  player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
 
145
  outputs=result_output,
146
  )
147
 
148
+ # Leaderboard Tab
149
  with gr.Tab("Leaderboard"):
150
  gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
151
 
152
+ game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value="Total Performance")
153
+ leaderboard_table = gr.Dataframe(value=calculate_leaderboard("Total Performance"), label="Leaderboard")
154
  model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
155
  download_button = gr.File(label="Download Statistics File")
156
  refresh_button = gr.Button("Refresh Leaderboard")
157
 
158
  def update_leaderboard(selected_game):
159
+ """Updates the leaderboard based on the selected game."""
160
  return calculate_leaderboard(selected_game)
161
 
162
  model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])