File size: 6,297 Bytes
6c9ff9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from typing import Tuple
import pandas as pd
import numpy as np
import time
import asyncio
from utils.s3_utils import write_to_s3
from utils.data_utils import generate_leaderboard, generate_data

submit_lock = asyncio.Lock()

def update_ratings(R_win : int, R_lose : int, k : int = 32) -> Tuple[int, int]:
    """
    Update the ratings of two players after a match.

    Args:
        R_win (int): The rating of the winning player.
        R_lose (int): The rating of the losing player.
        k (int, optional): The k-factor. Defaults to 32.

    Returns:
        Tuple[int, int]: The updated ratings of the winning and losing players.
    """
    E_win = 1 / (1 + 10 ** ((R_lose - R_win) / 480))
    E_lose = 1 / (1 + 10 ** ((R_win - R_lose) / 480))
    return int(R_win + k * (1 - E_win)), int(R_lose + k * (0 - E_lose))

def generate_matchup(leaderboard : pd.DataFrame, beta : int) -> tuple[str, str]:
    """
    Generate a pseudo-random matchup between two models.

    Args:
        leaderboard (pd.DataFrame): The leaderboard of models
        beta (int): The damping factor for the Elo update.

    Returns:
        model1 (str): The first model.
        model2 (str): The second model.
    """
    if leaderboard['Matches'].sum() == 0:
        return np.random.choice(leaderboard.index, 2, replace=False)
    weights = [np.exp(-leaderboard.at[model, 'Matches'] / beta) for model in leaderboard.index]
    weights = weights / np.sum(weights) # Normalize weights
    selected = np.random.choice(leaderboard.index, 2, replace=False, p=weights)
    np.random.shuffle(selected)
    model1, model2 = selected
    return model1, model2

async def simulate(iter : int, beta : int, criteria : str) -> pd.DataFrame:
    """
    Simulate matches between random models.

    Args:
        iter (int): The number of matches to simulate.
        beta (int): The damping factor for the Elo update.
        criteria (str): The criteria for the rating.

    Returns:
        leaderboard (pd.DataFrame): Updated leaderboard after simulation
    """
    data = await generate_data()

    leaderboard = await generate_leaderboard(criteria)
    leaderboard.set_index('Model', inplace=True)

    for _ in range(iter):
        # Generate random matchups
        timestamp = time.time()
        model1, model2 = generate_matchup(leaderboard, beta)
        R1, R2 = leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo']
        R1_new, R2_new = update_ratings(R1, R2)

        # Update leaderboard
        leaderboard.at[model1, 'Elo'], leaderboard.at[model2, 'Elo'] = R1_new, R2_new
        leaderboard.at[model1, 'Wins'] += 1
        leaderboard.at[model1, 'Matches'] += 1
        leaderboard.at[model2, 'Matches'] += 1
        leaderboard.at[model1, 'Win Rate'] = np.round(leaderboard.at[model1, 'Wins'] / leaderboard.at[model1, 'Matches'], 2)
        leaderboard.at[model2, 'Win Rate'] = np.round(leaderboard.at[model2, 'Wins'] / leaderboard.at[model2, 'Matches'], 2)

        # Save match data
        data.loc[len(data)] = {
            'Criteria': criteria,
            'Model': model1, 
            'Opponent': model2, 
            'Won': True, 
            'Elo': leaderboard.at[model1, 'Elo'], 
            'Win Rate': leaderboard.at[model1, 'Win Rate'],
            'Matches': leaderboard.at[model1, 'Matches'],
            'Timestamp': timestamp,
            'UUID': None
        }

        data.loc[len(data)] = {
            'Criteria': criteria,
            'Model': model2, 
            'Opponent': model1, 
            'Won': False, 
            'Elo': leaderboard.at[model2, 'Elo'], 
            'Win Rate': leaderboard.at[model2, 'Win Rate'],
            'Matches': leaderboard.at[model2, 'Matches'],
            'Timestamp': timestamp,
            'UUID': None
        }

    leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False)

    await asyncio.gather(
        write_to_s3(f'leaderboard_{criteria}.csv', leaderboard), 
        write_to_s3('data.csv', data)
    )

    return leaderboard


async def submit_rating(criteria : str, winner : str, loser : str, uuid : str) -> None:
    """
    Submit a rating for a match.

    Args:
        criteria (str): The criteria for the rating.
        winner (str): The winning model.
        loser (str): The losing model.
        uuid (str): The UUID of the session.
    """
    async with submit_lock:
        data = await generate_data()

        leaderboard = await generate_leaderboard(criteria)
        leaderboard.set_index('Model', inplace=True)

        if winner is None or loser is None:
            return leaderboard

        timestamp = time.time()
        R_win, R_lose = leaderboard.at[winner, 'Elo'], leaderboard.at[loser, 'Elo']
        R_win_new, R_lose_new = update_ratings(R_win, R_lose)

        # Update leaderboard
        leaderboard.loc[[winner, loser], 'Elo'] = [R_win_new, R_lose_new]
        leaderboard.at[winner, 'Wins'] += 1
        leaderboard.loc[[winner, loser], 'Matches'] += [1, 1]
        leaderboard.loc[[winner, loser], 'Win Rate'] = (
            leaderboard.loc[[winner, loser], 'Wins'] / leaderboard.loc[[winner, loser], 'Matches']
        ).apply(lambda x: round(x, 2))

        # Save match data
        data.loc[len(data)] = {
            'Criteria': criteria,
            'Model': winner, 
            'Opponent': loser, 
            'Won': True, 
            'Elo': leaderboard.at[winner, 'Elo'], 
            'Win Rate': leaderboard.at[winner, 'Win Rate'],
            'Matches': leaderboard.at[winner, 'Matches'],
            'Timestamp': timestamp,
            'UUID': uuid
        }
        
        data.loc[len(data)] = {
            'Criteria': criteria,
            'Model': loser, 
            'Opponent': winner, 
            'Won': False, 
            'Elo': leaderboard.at[loser, 'Elo'], 
            'Win Rate': leaderboard.at[loser, 'Win Rate'],
            'Matches': leaderboard.at[loser, 'Matches'],
            'Timestamp': timestamp,
            'UUID': uuid
        }

        leaderboard = leaderboard.sort_values('Elo', ascending=False).reset_index(drop=False)
        await asyncio.gather(
            write_to_s3(f'leaderboard_{criteria}.csv', leaderboard),
            write_to_s3('data.csv', data)
        )
        return leaderboard