Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from glob import glob | |
csv_results = glob("results/*.csv") | |
# load the csv files into a dict with keys being name of the file and values being the data | |
data = {file: pd.read_csv(file) for file in csv_results} | |
def calculate_accuracy(df): | |
return df["parsed_judge_response"].mean() * 100 | |
def accuracy_breakdown(df): | |
# 4 level accuracy | |
return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values | |
# Define the column names with icons | |
headers_with_icons = [ | |
"π€ Model Name", | |
"β Overall", | |
"π Level 1", | |
"π Level 2", | |
"π Level 3", | |
"π¬ Level 4", | |
] | |
accuracy = {file: calculate_accuracy(data[file]) for file in data} | |
# Create a list to hold the data | |
data_for_df = [] | |
# Define the column names with icons | |
# Iterate over each file and its corresponding DataFrame in the data dictionary | |
for file, df in data.items(): | |
# Get the overall accuracy and round it | |
overall_accuracy = round(calculate_accuracy(df), 2) | |
# Get the breakdown accuracy and round each value | |
breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)] | |
# Prepare the model name from the file name | |
model_name = file.split("/")[-1].replace(".csv", "") # Corrected the file extension | |
# Append the data to the list | |
data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy) | |
# Define the column names, adjust based on the number of difficulty levels you have | |
column_names = [ | |
"Model Name", | |
"Overall Accuracy", | |
"Level 1 Accuracy", | |
"Level 2 Accuracy", | |
"Level 3 Accuracy", | |
"Level 4 Accuracy", | |
] | |
# Create the DataFrame | |
accuracy_df = pd.DataFrame(data_for_df, columns=column_names) | |
accuracy_df.columns = headers_with_icons | |
accuracy_df.sort_values(by="β Overall", ascending=False, inplace=True) | |
with gr.Blocks() as demo: | |
gr.Markdown("# FSMBench Leaderboard") | |
# add link to home page and dataset | |
leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons) | |
demo.launch() | |