|
from typing import Tuple |
|
import pandas as pd |
|
import numpy as np |
|
import time |
|
import asyncio |
|
from utils.s3_utils import write_to_s3 |
|
from utils.data_utils import generate_leaderboard, generate_data |
|
|
|
submit_lock = asyncio.Lock() |
|
|
|
def update_ratings(R_win : int, R_lose : int, k : int = 32) -> Tuple[int, int]: |
|
""" |
|
Update the ratings of two players after a match. |
|
|
|
Args: |
|
R_win (int): The rating of the winning player. |
|
R_lose (int): The rating of the losing player. |
|
k (int, optional): The k-factor. Defaults to 32. |
|
|
|
Returns: |
|
Tuple[int, int]: The updated ratings of the winning and losing players. |
|
""" |
|
E_win = 1 / (1 + 10 ** ((R_lose - R_win) / 480)) |
|
E_lose = 1 / (1 + 10 ** ((R_win - R_lose) / 480)) |
|
return int(R_win + k * (1 - E_win)), int(R_lose + k * (0 - E_lose)) |
|
|
|
def generate_matchup(leaderboard : pd.DataFrame, beta : int) -> tuple[str, str]: |
|
""" |
|
Generate a pseudo-random matchup between two models. |
|
|
|
Args: |
|
leaderboard (pd.DataFrame): The leaderboard of models |
|
beta (int): The damping factor for the Elo update. |
|
|
|
Returns: |
|
model1 (str): The first model. |
|
model2 (str): The second model. |
|
""" |
|
if leaderboard['Matches'].sum() == 0: |
|
return np.random.choice(leaderboard.index, 2, replace=False) |
|
weights = [np.exp(-leaderboard.at[model, 'Matches'] / beta) for model in leaderboard.index] |
|
weights = weights / np.sum(weights) |
|
selected = np.random.choice(leaderboard.index, 2, replace=False, p=weights) |
|
np.random.shuffle(selected) |
|
model1, model2 = selected |
|
return model1, model2 |
|
|
|
async def simulate(iter : int, beta : int, criteria : str) -> pd.DataFrame: |
|
""" |
|
Simulate matches between random models. |
|
|
|
Args: |
|
iter (int): The number of matches to simulate. |
|
beta (int): The damping factor for the Elo update. |
|
criteria (str): The criteria for the rating. |
|
|
|
Returns: |
|
leaderboard (pd.DataFrame): Updated leaderboard after simulation |
|
""" |
|
data = await generate_data() |
|
|
|
leaderboard = await generate_leaderboard(criteria) |
|
leaderboard.set_index('Model', inplace=True) |
|
|
|
for _ in range(iter): |
|
|
|
timestamp = time.time() |
|
model1, model2 = generate_matchup(leaderboard, beta) |
|
R1, R2 = leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] |
|
R1_new, R2_new = update_ratings(R1, R2) |
|
|
|
|
|
leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] = R1_new, R2_new |
|
leaderboard.at[model1, 'Wins'] += 1 |
|
leaderboard.at[model1, 'Matches'] += 1 |
|
leaderboard.at[model2, 'Matches'] += 1 |
|
leaderboard.at[model1, 'Win Rate'] = np.round(leaderboard.at[model1, 'Wins'] / leaderboard.at[model1, 'Matches'], 2) |
|
leaderboard.at[model2, 'Win Rate'] = np.round(leaderboard.at[model2, 'Wins'] / leaderboard.at[model2, 'Matches'], 2) |
|
|
|
|
|
data.loc[len(data)] = { |
|
'Criteria': criteria, |
|
'Model': model1, |
|
'Opponent': model2, |
|
'Won': True, |
|
'Elo': leaderboard.at[model1, 'Elo'], |
|
'Win Rate': leaderboard.at[model1, 'Win Rate'], |
|
'Matches': leaderboard.at[model1, 'Matches'], |
|
'Timestamp': timestamp, |
|
'UUID': None |
|
} |
|
|
|
data.loc[len(data)] = { |
|
'Criteria': criteria, |
|
'Model': model2, |
|
'Opponent': model1, |
|
'Won': False, |
|
'Elo': leaderboard.at[model2, 'Elo'], |
|
'Win Rate': leaderboard.at[model2, 'Win Rate'], |
|
'Matches': leaderboard.at[model2, 'Matches'], |
|
'Timestamp': timestamp, |
|
'UUID': None |
|
} |
|
|
|
leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False) |
|
|
|
await asyncio.gather( |
|
write_to_s3(f'leaderboard_{criteria}.csv', leaderboard), |
|
write_to_s3('data.csv', data) |
|
) |
|
|
|
return leaderboard |
|
|
|
|
|
async def submit_rating(criteria : str, winner : str, loser : str, uuid : str) -> None: |
|
""" |
|
Submit a rating for a match. |
|
|
|
Args: |
|
criteria (str): The criteria for the rating. |
|
winner (str): The winning model. |
|
loser (str): The losing model. |
|
uuid (str): The UUID of the session. |
|
""" |
|
async with submit_lock: |
|
data = await generate_data() |
|
|
|
leaderboard = await generate_leaderboard(criteria) |
|
leaderboard.set_index('Model', inplace=True) |
|
|
|
if winner is None or loser is None: |
|
return leaderboard |
|
|
|
timestamp = time.time() |
|
R_win, R_lose = leaderboard.at[winner, 'Elo'], leaderboard.at[loser, 'Elo'] |
|
R_win_new, R_lose_new = update_ratings(R_win, R_lose) |
|
|
|
|
|
leaderboard.loc[[winner, loser], 'Elo'] = [R_win_new, R_lose_new] |
|
leaderboard.at[winner, 'Wins'] += 1 |
|
leaderboard.loc[[winner, loser], 'Matches'] += [1, 1] |
|
leaderboard.loc[[winner, loser], 'Win Rate'] = ( |
|
leaderboard.loc[[winner, loser], 'Wins'] / leaderboard.loc[[winner, loser], 'Matches'] |
|
).apply(lambda x: round(x, 2)) |
|
|
|
|
|
data.loc[len(data)] = { |
|
'Criteria': criteria, |
|
'Model': winner, |
|
'Opponent': loser, |
|
'Won': True, |
|
'Elo': leaderboard.at[winner, 'Elo'], |
|
'Win Rate': leaderboard.at[winner, 'Win Rate'], |
|
'Matches': leaderboard.at[winner, 'Matches'], |
|
'Timestamp': timestamp, |
|
'UUID': uuid |
|
} |
|
|
|
data.loc[len(data)] = { |
|
'Criteria': criteria, |
|
'Model': loser, |
|
'Opponent': winner, |
|
'Won': False, |
|
'Elo': leaderboard.at[loser, 'Elo'], |
|
'Win Rate': leaderboard.at[loser, 'Win Rate'], |
|
'Matches': leaderboard.at[loser, 'Matches'], |
|
'Timestamp': timestamp, |
|
'UUID': uuid |
|
} |
|
|
|
leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False) |
|
await asyncio.gather( |
|
write_to_s3(f'leaderboard_{criteria}.csv', leaderboard), |
|
write_to_s3('data.csv', data) |
|
) |
|
return leaderboard |