Spaces:

lcipolina
/

LLM_OpenSpiel_Arena

Sleeping

App Files Files Community

LLM_OpenSpiel_Arena / app.py

lcipolina

Trying to restore to working state

634c45e verified 4 months ago

raw

history blame

5.91 kB

	import os
	import json
	import pandas as pd
	import gradio as gr
	from agents.llm_registry import LLM_REGISTRY # Dynamically fetch LLM models
	from simulators.tic_tac_toe_simulator import TicTacToeSimulator
	from simulators.prisoners_dilemma_simulator import PrisonersDilemmaSimulator
	from simulators.rock_paper_scissors_simulator import RockPaperScissorsSimulator
	from games_registry import GAMES_REGISTRY
	from simulators.base_simulator import PlayerType
	from typing import Dict

	# Extract available LLM models
	llm_models = list(LLM_REGISTRY.keys())

	# Define game list manually (for now)
	#games_list = list(GAMES_REGISTRY.keys())
	games_list = [
	"rock_paper_scissors",
	"prisoners_dilemma",
	"tic_tac_toe",
	"connect_four",
	"matching_pennies",
	"kuhn_poker",
	]

	# File to persist results
	RESULTS_TRACKER_FILE = "results_tracker.json"

	def generate_stats_file(model_name: str):
	"""Generate a JSON file with detailed statistics for the selected LLM model."""
	file_path = f"{model_name}_stats.json"
	with open(file_path, "w") as f:
	json.dump(results_tracker.get(model_name, {}), f, indent=4)
	return file_path

	def provide_download_file(model_name):
	"""Creates a downloadable JSON file with stats for the selected model."""
	return generate_stats_file(model_name)

	def refresh_leaderboard():
	"""Manually refresh the leaderboard."""
	return calculate_leaderboard(game_dropdown.value)

	# Load or initialize the results tracker
	if os.path.exists(RESULTS_TRACKER_FILE):
	with open(RESULTS_TRACKER_FILE, "r") as f:
	results_tracker = json.load(f)
	else:
	results_tracker = {
	llm: {game: {"games": 0, "moves/game": 0, "illegal-moves": 0,
	"win-rate": 0, "vs Random": 0} for game in games_list}
	for llm in llm_models
	}

	def save_results_tracker():
	"""Save the results tracker to a JSON file."""
	with open(RESULTS_TRACKER_FILE, "w") as f:
	json.dump(results_tracker, f, indent=4)

	def calculate_leaderboard(selected_game: str) -> pd.DataFrame:
	"""Generate a structured leaderboard table for the selected game."""
	leaderboard_df = pd.DataFrame(index=llm_models,
	columns=["# games", "moves/game",
	"illegal-moves", "win-rate", "vs Random"])

	for llm in llm_models:
	game_stats = results_tracker[llm].get(selected_game, {})
	leaderboard_df.loc[llm] = [
	game_stats.get("games", 0),
	game_stats.get("moves/game", 0),
	game_stats.get("illegal-moves", 0),
	f"{game_stats.get('win-rate', 0):.1f}%",
	f"{game_stats.get('vs Random', 0):.1f}%"
	]

	leaderboard_df = leaderboard_df.reset_index()
	leaderboard_df.rename(columns={"index": "LLM Model"}, inplace=True)
	return leaderboard_df

	def play_game(game_name, player1_type, player2_type, player1_model, player2_model, rounds):
	"""Play the selected game with specified players."""
	llms = {}
	if player1_type == "llm":
	llms["Player 1"] = player1_model
	if player2_type == "llm":
	llms["Player 2"] = player2_model

	simulator_class = GAMES_REGISTRY[game_name]
	simulator = simulator_class(game_name, llms=llms)
	game_states = []

	def log_fn(state):
	"""Log current state and legal moves."""
	current_player = state.current_player()
	legal_moves = state.legal_actions(current_player)
	board = str(state)
	game_states.append(f"Current Player: {current_player}\nBoard:\n{board}\nLegal Moves: {legal_moves}")

	results = simulator.simulate(rounds=int(rounds), log_fn=log_fn)
	return "\n".join(game_states) + f"\nGame Result: {results}"

	# Gradio Interface
	with gr.Blocks() as interface:
	with gr.Tab("Game Arena"):
	gr.Markdown("# LLM Game Arena\nSelect a game and players to play against LLMs.")

	game_dropdown = gr.Dropdown(choices=games_list, label="Select a Game", value=games_list[0])
	player1_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 1 Type", value="llm")
	player2_dropdown = gr.Dropdown(choices=["human", "random_bot", "llm"], label="Player 2 Type", value="random_bot")
	player1_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 1 Model", visible=False)
	player2_model_dropdown = gr.Dropdown(choices=llm_models, label="Player 2 Model", visible=False)
	rounds_slider = gr.Slider(1, 10, step=1, label="Rounds")
	result_output = gr.Textbox(label="Game Result")

	play_button = gr.Button("Play Game")
	play_button.click(
	play_game,
	inputs=[game_dropdown, player1_dropdown, player2_dropdown, player1_model_dropdown, player2_model_dropdown, rounds_slider],
	outputs=result_output,
	)

	with gr.Tab("Leaderboard"):
	gr.Markdown("# LLM Model Leaderboard\nTrack performance across different games!")

	game_dropdown = gr.Dropdown(choices=games_list, label="Select Game", value=games_list[0])
	leaderboard_table = gr.Dataframe(value=calculate_leaderboard(games_list[0]), label="Leaderboard")
	model_dropdown = gr.Dropdown(choices=llm_models, label="Select LLM Model")
	download_button = gr.File(label="Download Statistics File")
	refresh_button = gr.Button("Refresh Leaderboard")

	def update_leaderboard(selected_game):
	"""Updates the leaderboard table based on the selected game."""
	return calculate_leaderboard(selected_game)

	model_dropdown.change(fn=provide_download_file, inputs=[model_dropdown], outputs=[download_button])
	game_dropdown.change(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])
	refresh_button.click(fn=update_leaderboard, inputs=[game_dropdown], outputs=[leaderboard_table])

	interface.launch()