lcipolina commited on
Commit
634c45e
·
verified ·
1 Parent(s): 84f0932

Trying to restore to working state

Browse files
Files changed (1) hide show
  1. app.py +69 -44
app.py CHANGED
@@ -14,6 +14,7 @@ from typing import Dict
14
  llm_models = list(LLM_REGISTRY.keys())
15
 
16
  # Define game list manually (for now)
 
17
  games_list = [
18
  "rock_paper_scissors",
19
  "prisoners_dilemma",
@@ -21,12 +22,26 @@ games_list = [
21
  "connect_four",
22
  "matching_pennies",
23
  "kuhn_poker",
24
- "Overall Leaderboard"
25
  ]
26
 
27
  # File to persist results
28
  RESULTS_TRACKER_FILE = "results_tracker.json"
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Load or initialize the results tracker
31
  if os.path.exists(RESULTS_TRACKER_FILE):
32
  with open(RESULTS_TRACKER_FILE, "r") as f:
@@ -43,51 +58,22 @@ def save_results_tracker():
43
  with open(RESULTS_TRACKER_FILE, "w") as f:
44
  json.dump(results_tracker, f, indent=4)
45
 
46
- def generate_stats_file(model_name: str):
47
- """Generate a JSON file with detailed statistics for the selected LLM model."""
48
- file_path = f"{model_name}_stats.json"
49
- with open(file_path, "w") as f:
50
- json.dump(results_tracker.get(model_name, {}), f, indent=4)
51
- return file_path
52
-
53
- def provide_download_file(model_name):
54
- """Creates a downloadable JSON file with stats for the selected model."""
55
- return generate_stats_file(model_name)
56
-
57
  def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
58
- """Generate a structured leaderboard table for the selected game or overall."""
59
- leaderboard_df = pd.DataFrame(index=llm_models,
60
- columns=["# games", "moves/game",
61
  "illegal-moves", "win-rate", "vs Random"])
62
-
63
  for llm in llm_models:
64
- if selected_game == "Overall Leaderboard":
65
- total_games = 0
66
- total_moves = 0
67
- total_illegal_moves = 0
68
- total_wins = 0
69
- total_vs_random = 0
70
- for game in games_list[:-1]:
71
- game_stats = results_tracker[llm].get(game, {})
72
- total_games += game_stats.get("games", 0)
73
- total_moves += game_stats.get("moves/game", 0) * game_stats.get("games", 0)
74
- total_illegal_moves += game_stats.get("illegal-moves", 0)
75
- total_wins += (game_stats.get("win-rate", 0) * game_stats.get("games", 0)) / 100
76
- total_vs_random += (game_stats.get("vs Random", 0) * game_stats.get("games", 0)) / 100
77
- avg_moves = total_moves / total_games if total_games > 0 else 0
78
- avg_win_rate = (total_wins / total_games) * 100 if total_games > 0 else 0
79
- avg_vs_random = (total_vs_random / total_games) * 100 if total_games > 0 else 0
80
- leaderboard_df.loc[llm] = [total_games, avg_moves, total_illegal_moves, f"{avg_win_rate:.1f}%", f"{avg_vs_random:.1f}%"]
81
- else:
82
- game_stats = results_tracker[llm].get(selected_game, {})
83
- leaderboard_df.loc[llm] = [
84
- game_stats.get("games", 0),
85
- game_stats.get("moves/game", 0),
86
- game_stats.get("illegal-moves", 0),
87
- f"{game_stats.get('win-rate', 0):.1f}%",
88
- f"{game_stats.get('vs Random', 0):.1f}%"
89
- ]
90
-
91
  leaderboard_df = leaderboard_df.reset_index()
92
  leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
93
  return leaderboard_df
@@ -110,6 +96,45 @@ def play_game(game_name, player1_type, player2_type, player1_model, player2_mode
110
  legal_moves = state.legal_actions(current_player)
111
  board = str(state)
112
  game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")
113
-
114
  results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
115
  return "\n".join(game_states) + f"\nGame Result: {results}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  llm_models = list(LLM_REGISTRY.keys())
15
 
16
  # Define game list manually (for now)
17
+ #games_list = list(GAMES_REGISTRY.keys())
18
  games_list = [
19
  "rock_paper_scissors",
20
  "prisoners_dilemma",
 
22
  "connect_four",
23
  "matching_pennies",
24
  "kuhn_poker",
 
25
  ]
26
 
27
  # File to persist results
28
  RESULTS_TRACKER_FILE = "results_tracker.json"
29
 
30
+ def generate_stats_file(model_name: str):
31
+ """Generate a JSON file with detailed statistics for the selected LLM model."""
32
+ file_path = f"{model_name}_stats.json"
33
+ with open(file_path, "w") as f:
34
+ json.dump(results_tracker.get(model_name, {}), f, indent=4)
35
+ return file_path
36
+
37
+ def provide_download_file(model_name):
38
+ """Creates a downloadable JSON file with stats for the selected model."""
39
+ return generate_stats_file(model_name)
40
+
41
+ def refresh_leaderboard():
42
+ """Manually refresh the leaderboard."""
43
+ return calculate_leaderboard(game_dropdown.value)
44
+
45
  # Load or initialize the results tracker
46
  if os.path.exists(RESULTS_TRACKER_FILE):
47
  with open(RESULTS_TRACKER_FILE, "r") as f:
 
58
  with open(RESULTS_TRACKER_FILE, "w") as f:
59
  json.dump(results_tracker, f, indent=4)
60
 
 
 
 
 
 
 
 
 
 
 
 
61
  def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
62
+ """Generate a structured leaderboard table for the selected game."""
63
+ leaderboard_df = pd.DataFrame(index=llm_models,
64
+ columns=["# games", "moves/game",
65
  "illegal-moves", "win-rate", "vs Random"])
66
+
67
  for llm in llm_models:
68
+ game_stats = results_tracker[llm].get(selected_game, {})
69
+ leaderboard_df.loc[llm] = [
70
+ game_stats.get("games", 0),
71
+ game_stats.get("moves/game", 0),
72
+ game_stats.get("illegal-moves", 0),
73
+ f"{game_stats.get('win-rate', 0):.1f}%",
74
+ f"{game_stats.get('vs Random', 0):.1f}%"
75
+ ]
76
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  leaderboard_df = leaderboard_df.reset_index()
78
  leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
79
  return leaderboard_df
 
96
  legal_moves = state.legal_actions(current_player)
97
  board = str(state)
98
  game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")
99
+
100
  results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
101
  return "\n".join(game_states) + f"\nGame Result: {results}"
102
+
103
+ # Gradio Interface
104
+ with gr.Blocks() as interface:
105
+ with gr.Tab("Game Arena"):
106
+ gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")
107
+
108
+ game_dropdown = gr.Dropdown(choices=games_list, label="Select a Game", value=games_list[0])
109
+ player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
110
+ player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
111
+ player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
112
+ player2_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 2 Model", visible=False)
113
+ rounds_slider = gr.Slider(1, 10, step=1, label="Rounds")
114
+ result_output = gr.Textbox(label="Game Result")
115
+
116
+ play_button = gr.Button("Play Game")
117
+ play_button.click(
118
+ play_game,
119
+ inputs=[game_dropdown, player1_dropdown, player2_dropdown, player1_model_dropdown, player2_model_dropdown, rounds_slider],
120
+ outputs=result_output,
121
+ )
122
+
123
+ with gr.Tab("Leaderboard"):
124
+ gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")
125
+
126
+ game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
127
+ leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
128
+ model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
129
+ download_button = gr.File(label="Download Statistics File")
130
+ refresh_button = gr.Button("Refresh Leaderboard")
131
+
132
+ def update_leaderboard(selected_game):
133
+ """Updates the leaderboard table based on the selected game."""
134
+ return calculate_leaderboard(selected_game)
135
+
136
+ model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
137
+ game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
138
+ refresh_button.click(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
139
+
140
+ interface.launch()