File size: 2,048 Bytes
a3c4484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import pandas as pd
from glob import glob


csv_results = glob("results/*.csv")
# load the csv files into a dict with keys being name of the file and values being the data
data = {file: pd.read_csv(file) for file in csv_results}


def calculate_accuracy(df):
    return df["parsed_judge_response"].mean() * 100


def accuracy_breakdown(df):
    # 4 level accuracy
    return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values


# Define the column names with icons
headers_with_icons = [
    "πŸ€– Model Name",
    "⭐ Overall",
    "πŸ“ˆ Level 1",
    "πŸ” Level 2",
    "πŸ“˜ Level 3",
    "πŸ”¬ Level 4",
]


accuracy = {file: calculate_accuracy(data[file]) for file in data}

# Create a list to hold the data
data_for_df = []
# Define the column names with icons

# Iterate over each file and its corresponding DataFrame in the data dictionary
for file, df in data.items():
    # Get the overall accuracy and round it
    overall_accuracy = round(calculate_accuracy(df), 2)
    # Get the breakdown accuracy and round each value
    breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
    # Prepare the model name from the file name
    model_name = file.split("/")[-1].replace(".csv", "")  # Corrected the file extension
    # Append the data to the list
    data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)

# Define the column names, adjust based on the number of difficulty levels you have
column_names = [
    "Model Name",
    "Overall Accuracy",
    "Level 1 Accuracy",
    "Level 2 Accuracy",
    "Level 3 Accuracy",
    "Level 4 Accuracy",
]

# Create the DataFrame
accuracy_df = pd.DataFrame(data_for_df, columns=column_names)
accuracy_df.columns = headers_with_icons
accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True)


with gr.Blocks() as demo:
    gr.Markdown("# FSMBench Leaderboard")
    # add link to home page and dataset

    leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)

    demo.launch()