Spaces:

FSMBench
/

Leaderboard

Sleeping

App Files Files Community

taesiri commited on Apr 3, 2024

Commit

a3c4484

1 Parent(s): 8ca7374

update

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+import pandas as pd
+from glob import glob
+csv_results = glob("results/*.csv")
+# load the csv files into a dict with keys being name of the file and values being the data
+data = {file: pd.read_csv(file) for file in csv_results}
+def calculate_accuracy(df):
+    return df["parsed_judge_response"].mean() * 100
+def accuracy_breakdown(df):
+    # 4 level accuracy
+    return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values
+# Define the column names with icons
+headers_with_icons = [
+    "🤖 Model Name",
+    "⭐ Overall",
+    "📈 Level 1",
+    "🔍 Level 2",
+    "📘 Level 3",
+    "🔬 Level 4",
+]
+accuracy = {file: calculate_accuracy(data[file]) for file in data}
+# Create a list to hold the data
+data_for_df = []
+# Define the column names with icons
+# Iterate over each file and its corresponding DataFrame in the data dictionary
+for file, df in data.items():
+    # Get the overall accuracy and round it
+    overall_accuracy = round(calculate_accuracy(df), 2)
+    # Get the breakdown accuracy and round each value
+    breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
+    # Prepare the model name from the file name
+    model_name = file.split("/")[-1].replace(".csv", "")  # Corrected the file extension
+    # Append the data to the list
+    data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)
+# Define the column names, adjust based on the number of difficulty levels you have
+column_names = [
+    "Model Name",
+    "Overall Accuracy",
+    "Level 1 Accuracy",
+    "Level 2 Accuracy",
+    "Level 3 Accuracy",
+    "Level 4 Accuracy",
+]
+# Create the DataFrame
+accuracy_df = pd.DataFrame(data_for_df, columns=column_names)
+accuracy_df.columns = headers_with_icons
+accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True)
+with gr.Blocks() as demo:
+    gr.Markdown("# FSMBench Leaderboard")
+    # add link to home page and dataset
+    leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
+    demo.launch()