File size: 6,297 Bytes
6c9ff9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
from typing import Tuple
import pandas as pd
import numpy as np
import time
import asyncio
from utils.s3_utils import write_to_s3
from utils.data_utils import generate_leaderboard, generate_data
submit_lock = asyncio.Lock()
def update_ratings(R_win : int, R_lose : int, k : int = 32) -> Tuple[int, int]:
"""
Update the ratings of two players after a match.
Args:
R_win (int): The rating of the winning player.
R_lose (int): The rating of the losing player.
k (int, optional): The k-factor. Defaults to 32.
Returns:
Tuple[int, int]: The updated ratings of the winning and losing players.
"""
E_win = 1 / (1 + 10 ** ((R_lose - R_win) / 480))
E_lose = 1 / (1 + 10 ** ((R_win - R_lose) / 480))
return int(R_win + k * (1 - E_win)), int(R_lose + k * (0 - E_lose))
def generate_matchup(leaderboard : pd.DataFrame, beta : int) -> tuple[str, str]:
"""
Generate a pseudo-random matchup between two models.
Args:
leaderboard (pd.DataFrame): The leaderboard of models
beta (int): The damping factor for the Elo update.
Returns:
model1 (str): The first model.
model2 (str): The second model.
"""
if leaderboard['Matches'].sum() == 0:
return np.random.choice(leaderboard.index, 2, replace=False)
weights = [np.exp(-leaderboard.at[model, 'Matches'] / beta) for model in leaderboard.index]
weights = weights / np.sum(weights) # Normalize weights
selected = np.random.choice(leaderboard.index, 2, replace=False, p=weights)
np.random.shuffle(selected)
model1, model2 = selected
return model1, model2
async def simulate(iter : int, beta : int, criteria : str) -> pd.DataFrame:
"""
Simulate matches between random models.
Args:
iter (int): The number of matches to simulate.
beta (int): The damping factor for the Elo update.
criteria (str): The criteria for the rating.
Returns:
leaderboard (pd.DataFrame): Updated leaderboard after simulation
"""
data = await generate_data()
leaderboard = await generate_leaderboard(criteria)
leaderboard.set_index('Model', inplace=True)
for _ in range(iter):
# Generate random matchups
timestamp = time.time()
model1, model2 = generate_matchup(leaderboard, beta)
R1, R2 = leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo']
R1_new, R2_new = update_ratings(R1, R2)
# Update leaderboard
leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] = R1_new, R2_new
leaderboard.at[model1, 'Wins'] += 1
leaderboard.at[model1, 'Matches'] += 1
leaderboard.at[model2, 'Matches'] += 1
leaderboard.at[model1, 'Win Rate'] = np.round(leaderboard.at[model1, 'Wins'] / leaderboard.at[model1, 'Matches'], 2)
leaderboard.at[model2, 'Win Rate'] = np.round(leaderboard.at[model2, 'Wins'] / leaderboard.at[model2, 'Matches'], 2)
# Save match data
data.loc[len(data)] = {
'Criteria': criteria,
'Model': model1,
'Opponent': model2,
'Won': True,
'Elo': leaderboard.at[model1, 'Elo'],
'Win Rate': leaderboard.at[model1, 'Win Rate'],
'Matches': leaderboard.at[model1, 'Matches'],
'Timestamp': timestamp,
'UUID': None
}
data.loc[len(data)] = {
'Criteria': criteria,
'Model': model2,
'Opponent': model1,
'Won': False,
'Elo': leaderboard.at[model2, 'Elo'],
'Win Rate': leaderboard.at[model2, 'Win Rate'],
'Matches': leaderboard.at[model2, 'Matches'],
'Timestamp': timestamp,
'UUID': None
}
leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False)
await asyncio.gather(
write_to_s3(f'leaderboard_{criteria}.csv', leaderboard),
write_to_s3('data.csv', data)
)
return leaderboard
async def submit_rating(criteria : str, winner : str, loser : str, uuid : str) -> None:
"""
Submit a rating for a match.
Args:
criteria (str): The criteria for the rating.
winner (str): The winning model.
loser (str): The losing model.
uuid (str): The UUID of the session.
"""
async with submit_lock:
data = await generate_data()
leaderboard = await generate_leaderboard(criteria)
leaderboard.set_index('Model', inplace=True)
if winner is None or loser is None:
return leaderboard
timestamp = time.time()
R_win, R_lose = leaderboard.at[winner, 'Elo'], leaderboard.at[loser, 'Elo']
R_win_new, R_lose_new = update_ratings(R_win, R_lose)
# Update leaderboard
leaderboard.loc[[winner, loser], 'Elo'] = [R_win_new, R_lose_new]
leaderboard.at[winner, 'Wins'] += 1
leaderboard.loc[[winner, loser], 'Matches'] += [1, 1]
leaderboard.loc[[winner, loser], 'Win Rate'] = (
leaderboard.loc[[winner, loser], 'Wins'] / leaderboard.loc[[winner, loser], 'Matches']
).apply(lambda x: round(x, 2))
# Save match data
data.loc[len(data)] = {
'Criteria': criteria,
'Model': winner,
'Opponent': loser,
'Won': True,
'Elo': leaderboard.at[winner, 'Elo'],
'Win Rate': leaderboard.at[winner, 'Win Rate'],
'Matches': leaderboard.at[winner, 'Matches'],
'Timestamp': timestamp,
'UUID': uuid
}
data.loc[len(data)] = {
'Criteria': criteria,
'Model': loser,
'Opponent': winner,
'Won': False,
'Elo': leaderboard.at[loser, 'Elo'],
'Win Rate': leaderboard.at[loser, 'Win Rate'],
'Matches': leaderboard.at[loser, 'Matches'],
'Timestamp': timestamp,
'UUID': uuid
}
leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False)
await asyncio.gather(
write_to_s3(f'leaderboard_{criteria}.csv', leaderboard),
write_to_s3('data.csv', data)
)
return leaderboard |