File size: 3,025 Bytes
b9c0bac
 
 
 
 
 
5a11597
b9c0bac
 
54e2d5b
 
 
 
 
 
b9c0bac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54e2d5b
 
 
 
 
 
 
b9c0bac
 
 
 
 
 
5a11597
b9c0bac
 
 
 
 
 
 
 
 
 
5a11597
b9c0bac
 
 
 
 
 
 
 
 
 
54e2d5b
b9c0bac
54e2d5b
b9c0bac
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# This file is kept for reference only and is not used in the enhanced implementation
# The actual implementation is in enhanced_leaderboard.py

import gradio as gr
import pandas as pd
from gradio_leaderboard import Leaderboard
from loguru import logger

import populate
from envs import EVAL_RESULTS_PATH, LEADERBOARD_REFRESH_INTERVAL, RESULTS_REPO
from hf_datasets_utils import download_dataset_snapshot


def download_leaderboard_data():
    download_dataset_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)


def fetch_tossup_leaderboard(style: bool = True):
    df = populate.get_tossups_leaderboard_df(EVAL_RESULTS_PATH, "tiny_eval")

    def colour_pos_neg(v):
        """Return a CSS rule for the cell that called the function."""
        if pd.isna(v):  # keep NaNs unstyled
            return ""
        return "color: green;" if v > 0 else "color: red;"

    # Apply formatting and styling
    styled_df = df.style.format(
        {
            "Avg Score ⬆️": "{:5.2f}",
            "Buzz Accuracy": "{:>6.1%}",
            "Buzz Position": "{:>6.2f}",
            "Win Rate w/ Humans": "{:>6.1%}",
            "Win Rate w/ Humans (Aggressive)": "{:>6.1%}",
        }
    ).map(colour_pos_neg, subset=["Avg Score ⬆️"])

    return styled_df if style else df


def fetch_bonus_leaderboard(style: bool = True):
    df = populate.get_bonuses_leaderboard_df(EVAL_RESULTS_PATH, "tiny_eval")

    # Apply formatting and styling
    styled_df = df.style.format(
        {
            "Question Accuracy": "{:>6.1%}",
            "Part Accuracy": "{:>6.1%}",
        }
    )

    return styled_df if style else df


def refresh_leaderboard(style: bool = True):
    download_leaderboard_data()
    tossup_df = fetch_tossup_leaderboard(style)
    bonus_df = fetch_bonus_leaderboard(style)
    return tossup_df, bonus_df


def create_leaderboard_interface(app):
    leaderboard_timer = gr.Timer(LEADERBOARD_REFRESH_INTERVAL)
    refresh_btn = gr.Button("πŸ”„ Refresh")

    gr.Markdown("## πŸ“š Tossup Round Leaderboard")
    tossup_df = fetch_tossup_leaderboard(style=False)
    logger.info(f"Tossup dataframe columns: {tossup_df.columns}")
    tossup_leaderboard = Leaderboard(
        value=tossup_df,
        search_columns=["Submission"],
        datatype=["str", "number", "number", "number", "number", "number"],
        elem_id="tossup-table",
        interactive=False,  # Ensure it's not interactive
    )

    gr.Markdown("## πŸ“š Bonus Round Leaderboard")
    bonus_df = fetch_bonus_leaderboard(style=False)
    logger.info(f"Bonus dataframe columns: {bonus_df.columns}")
    bonus_leaderboard = Leaderboard(
        value=bonus_df,
        search_columns=["Submission"],
        datatype=["str", "number", "number"],
        elem_id="bonus-table",
        interactive=False,  # Ensure it's not interactive
    )

    gr.on(
        triggers=[leaderboard_timer.tick, refresh_btn.click, app.load],
        fn=refresh_leaderboard,
        inputs=[],
        outputs=[tossup_leaderboard, bonus_leaderboard],
    )