3D-animation-arena's picture
Set up arena
6c9ff9d verified
from typing import Tuple
import pandas as pd
import numpy as np
import time
import asyncio
from utils.s3_utils import write_to_s3
from utils.data_utils import generate_leaderboard, generate_data
submit_lock = asyncio.Lock()
def update_ratings(R_win : int, R_lose : int, k : int = 32) -> Tuple[int, int]:
"""
Update the ratings of two players after a match.
Args:
R_win (int): The rating of the winning player.
R_lose (int): The rating of the losing player.
k (int, optional): The k-factor. Defaults to 32.
Returns:
Tuple[int, int]: The updated ratings of the winning and losing players.
"""
E_win = 1 / (1 + 10 ** ((R_lose - R_win) / 480))
E_lose = 1 / (1 + 10 ** ((R_win - R_lose) / 480))
return int(R_win + k * (1 - E_win)), int(R_lose + k * (0 - E_lose))
def generate_matchup(leaderboard : pd.DataFrame, beta : int) -> tuple[str, str]:
"""
Generate a pseudo-random matchup between two models.
Args:
leaderboard (pd.DataFrame): The leaderboard of models
beta (int): The damping factor for the Elo update.
Returns:
model1 (str): The first model.
model2 (str): The second model.
"""
if leaderboard['Matches'].sum() == 0:
return np.random.choice(leaderboard.index, 2, replace=False)
weights = [np.exp(-leaderboard.at[model, 'Matches'] / beta) for model in leaderboard.index]
weights = weights / np.sum(weights) # Normalize weights
selected = np.random.choice(leaderboard.index, 2, replace=False, p=weights)
np.random.shuffle(selected)
model1, model2 = selected
return model1, model2
async def simulate(iter : int, beta : int, criteria : str) -> pd.DataFrame:
"""
Simulate matches between random models.
Args:
iter (int): The number of matches to simulate.
beta (int): The damping factor for the Elo update.
criteria (str): The criteria for the rating.
Returns:
leaderboard (pd.DataFrame): Updated leaderboard after simulation
"""
data = await generate_data()
leaderboard = await generate_leaderboard(criteria)
leaderboard.set_index('Model', inplace=True)
for _ in range(iter):
# Generate random matchups
timestamp = time.time()
model1, model2 = generate_matchup(leaderboard, beta)
R1, R2 = leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo']
R1_new, R2_new = update_ratings(R1, R2)
# Update leaderboard
leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] = R1_new, R2_new
leaderboard.at[model1, 'Wins'] += 1
leaderboard.at[model1, 'Matches'] += 1
leaderboard.at[model2, 'Matches'] += 1
leaderboard.at[model1, 'Win Rate'] = np.round(leaderboard.at[model1, 'Wins'] / leaderboard.at[model1, 'Matches'], 2)
leaderboard.at[model2, 'Win Rate'] = np.round(leaderboard.at[model2, 'Wins'] / leaderboard.at[model2, 'Matches'], 2)
# Save match data
data.loc[len(data)] = {
'Criteria': criteria,
'Model': model1,
'Opponent': model2,
'Won': True,
'Elo': leaderboard.at[model1, 'Elo'],
'Win Rate': leaderboard.at[model1, 'Win Rate'],
'Matches': leaderboard.at[model1, 'Matches'],
'Timestamp': timestamp,
'UUID': None
}
data.loc[len(data)] = {
'Criteria': criteria,
'Model': model2,
'Opponent': model1,
'Won': False,
'Elo': leaderboard.at[model2, 'Elo'],
'Win Rate': leaderboard.at[model2, 'Win Rate'],
'Matches': leaderboard.at[model2, 'Matches'],
'Timestamp': timestamp,
'UUID': None
}
leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False)
await asyncio.gather(
write_to_s3(f'leaderboard_{criteria}.csv', leaderboard),
write_to_s3('data.csv', data)
)
return leaderboard
async def submit_rating(criteria : str, winner : str, loser : str, uuid : str) -> None:
"""
Submit a rating for a match.
Args:
criteria (str): The criteria for the rating.
winner (str): The winning model.
loser (str): The losing model.
uuid (str): The UUID of the session.
"""
async with submit_lock:
data = await generate_data()
leaderboard = await generate_leaderboard(criteria)
leaderboard.set_index('Model', inplace=True)
if winner is None or loser is None:
return leaderboard
timestamp = time.time()
R_win, R_lose = leaderboard.at[winner, 'Elo'], leaderboard.at[loser, 'Elo']
R_win_new, R_lose_new = update_ratings(R_win, R_lose)
# Update leaderboard
leaderboard.loc[[winner, loser], 'Elo'] = [R_win_new, R_lose_new]
leaderboard.at[winner, 'Wins'] += 1
leaderboard.loc[[winner, loser], 'Matches'] += [1, 1]
leaderboard.loc[[winner, loser], 'Win Rate'] = (
leaderboard.loc[[winner, loser], 'Wins'] / leaderboard.loc[[winner, loser], 'Matches']
).apply(lambda x: round(x, 2))
# Save match data
data.loc[len(data)] = {
'Criteria': criteria,
'Model': winner,
'Opponent': loser,
'Won': True,
'Elo': leaderboard.at[winner, 'Elo'],
'Win Rate': leaderboard.at[winner, 'Win Rate'],
'Matches': leaderboard.at[winner, 'Matches'],
'Timestamp': timestamp,
'UUID': uuid
}
data.loc[len(data)] = {
'Criteria': criteria,
'Model': loser,
'Opponent': winner,
'Won': False,
'Elo': leaderboard.at[loser, 'Elo'],
'Win Rate': leaderboard.at[loser, 'Win Rate'],
'Matches': leaderboard.at[loser, 'Matches'],
'Timestamp': timestamp,
'UUID': uuid
}
leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False)
await asyncio.gather(
write_to_s3(f'leaderboard_{criteria}.csv', leaderboard),
write_to_s3('data.csv', data)
)
return leaderboard