daniel7an commited on
Commit
d21b14f
·
1 Parent(s): 33d83a0
Files changed (3) hide show
  1. benchmark_results.csv +10 -0
  2. main.py +45 -0
  3. requirements.txt +3 -0
benchmark_results.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model,armenian_language_score,armenian_history_score,mathematics_score,average_score
2
+ claude-3-7-sonnet-20250219,10.5,7.75,15.0,11.08
3
+ claude-3-5-sonnet-20241022,10.0,9.25,12.75,10.67
4
+ gemini-2.0-flash,5.5,6.75,17.25,9.83
5
+ gpt-4o,6.75,6.75,13.25,8.92
6
+ qwen-max-2025-01-25,7.25,4.5,14.25,8.67
7
+ gemini-1.5-flash,4.75,3.75,15.0,7.83
8
+ deepseek-ai/DeepSeek-V3,5.25,5.0,12.25,7.5
9
+ Meta-Llama-3.3-70B-Instruct,4.5,5.25,11.5,7.08
10
+ claude-3-5-haiku-20241022,5.0,3.75,10.75,6.5
main.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import plotly.express as px
4
+
5
+ def display_table():
6
+ df = pd.read_csv('benchmark_results.csv')
7
+ return df
8
+
9
+ def create_bar_chart():
10
+ df = pd.read_csv('benchmark_results.csv')
11
+ fig = px.bar(df,
12
+ x='average_score',
13
+ y='model',
14
+ color='average_score',
15
+ color_continuous_scale='tealrose',
16
+ hover_data=['armenian_language_score', 'armenian_history_score', 'mathematics_score'],
17
+ labels={'average_score': 'Average Score', 'model': 'Model'},
18
+ title='Average Score per Model',
19
+ orientation='h',
20
+ range_color=[0, 20])
21
+
22
+ fig.update_layout(
23
+ xaxis=dict(range=[0, 20]),
24
+ title=dict(text='Average Score per Model', font=dict(size=16)),
25
+ xaxis_title=dict(font=dict(size=12)),
26
+ yaxis_title=dict(font=dict(size=12)),
27
+ yaxis=dict(autorange="reversed"),
28
+ hoverlabel=dict(
29
+ bgcolor="white",
30
+ font_size=12,
31
+ font_family="Arial",
32
+ font_color="black"
33
+ )
34
+ )
35
+
36
+ return fig
37
+
38
+ with gr.Blocks() as app:
39
+ gr.Markdown("# ArmBench Leaderboard")
40
+ table_output = gr.DataFrame(value=display_table())
41
+ plot_output = gr.Plot(create_bar_chart)
42
+
43
+ app.launch(share=True)
44
+
45
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.19.0
2
+ pandas==2.2.3
3
+ plotly==6.0.0