taesiri commited on
Commit
a3c4484
Β·
1 Parent(s): 8ca7374
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from glob import glob
4
+
5
+
6
+ csv_results = glob("results/*.csv")
7
+ # load the csv files into a dict with keys being name of the file and values being the data
8
+ data = {file: pd.read_csv(file) for file in csv_results}
9
+
10
+
11
+ def calculate_accuracy(df):
12
+ return df["parsed_judge_response"].mean() * 100
13
+
14
+
15
+ def accuracy_breakdown(df):
16
+ # 4 level accuracy
17
+ return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values
18
+
19
+
20
+ # Define the column names with icons
21
+ headers_with_icons = [
22
+ "πŸ€– Model Name",
23
+ "⭐ Overall",
24
+ "πŸ“ˆ Level 1",
25
+ "πŸ” Level 2",
26
+ "πŸ“˜ Level 3",
27
+ "πŸ”¬ Level 4",
28
+ ]
29
+
30
+
31
+ accuracy = {file: calculate_accuracy(data[file]) for file in data}
32
+
33
+ # Create a list to hold the data
34
+ data_for_df = []
35
+ # Define the column names with icons
36
+
37
+ # Iterate over each file and its corresponding DataFrame in the data dictionary
38
+ for file, df in data.items():
39
+ # Get the overall accuracy and round it
40
+ overall_accuracy = round(calculate_accuracy(df), 2)
41
+ # Get the breakdown accuracy and round each value
42
+ breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
43
+ # Prepare the model name from the file name
44
+ model_name = file.split("/")[-1].replace(".csv", "") # Corrected the file extension
45
+ # Append the data to the list
46
+ data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)
47
+
48
+ # Define the column names, adjust based on the number of difficulty levels you have
49
+ column_names = [
50
+ "Model Name",
51
+ "Overall Accuracy",
52
+ "Level 1 Accuracy",
53
+ "Level 2 Accuracy",
54
+ "Level 3 Accuracy",
55
+ "Level 4 Accuracy",
56
+ ]
57
+
58
+ # Create the DataFrame
59
+ accuracy_df = pd.DataFrame(data_for_df, columns=column_names)
60
+ accuracy_df.columns = headers_with_icons
61
+ accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True)
62
+
63
+
64
+ with gr.Blocks() as demo:
65
+ gr.Markdown("# FSMBench Leaderboard")
66
+ # add link to home page and dataset
67
+
68
+ leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
69
+
70
+ demo.launch()