Spaces:
Running
Running
File size: 5,619 Bytes
4781b83 0d208c0 4781b83 0d208c0 4781b83 1b75b9d 4781b83 0d208c0 1b75b9d 4781b83 0d208c0 4781b83 1b75b9d 4781b83 0d208c0 1b75b9d 0d208c0 4781b83 0d208c0 4781b83 1b75b9d 4781b83 0d208c0 1b75b9d 4781b83 0d208c0 4781b83 0d208c0 1b75b9d 4781b83 1b75b9d 0d208c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
import pandas as pd
import plotly.express as px
def display_table(exam_type):
if exam_type == "Armenian Exams":
df = pd.read_csv('unified_exam_results.csv')
df = df.sort_values(by='Average', ascending=False)
cols = df.columns.tolist()
cols.insert(1, cols.pop(cols.index('Average')))
df = df[cols]
df.rename(columns={'Armenian language and literature': 'Armenian language\nand literature'}, inplace=True)
df = df.round(4)
elif exam_type == "MMLU-Pro-Hy":
df = pd.read_csv('mmlu_pro_hy_results.csv')
subject_cols = ['Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Other', 'Philosophy', 'Physics', 'Psychology']
df['Average'] = df[subject_cols].mean(axis=1)
df = df.sort_values(by='Average', ascending=False)
cols = df.columns.tolist()
cols.remove('Accuracy')
cols.insert(1, cols.pop(cols.index('Average')))
cols.append(cols.pop(cols.index('Other')))
df = df[cols]
df = df.round(4)
return df
def create_bar_chart(exam_type, plot_column):
if exam_type == "Armenian Exams":
df = pd.read_csv('unified_exam_results.csv')
df = df.sort_values(by=[plot_column, 'Model'], ascending=[False, True]).reset_index(drop=True)
x_col = plot_column
title = f'{plot_column}'
x_range_max = 20
def get_label(score):
if score < 8:
return "Fail"
elif 8 <= score <= 18:
return "Pass"
else:
return "Distinction"
df['Test Result'] = df[plot_column].apply(get_label)
color_discrete_map = {
"Fail": "#ff5f56",
"Pass": "#ffbd2e",
"Distinction": "#27c93f"
}
fig = px.bar(df,
x=x_col,
y='Model',
color=df['Test Result'],
color_discrete_map=color_discrete_map,
labels={x_col: 'Score', 'Model': 'Model'},
title=title,
orientation='h')
fig.update_layout(
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(autorange="reversed"),
autosize=True
)
return fig
elif exam_type == "MMLU-Pro-Hy":
df = pd.read_csv('mmlu_pro_hy_results.csv')
subject_cols = ['Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Other', 'Philosophy', 'Physics', 'Psychology']
df['Average'] = df[subject_cols].mean(axis=1)
df = df.sort_values(by=plot_column, ascending=False).reset_index(drop=True)
df = df.drop(columns=['Accuracy'])
x_col = plot_column
title = f'{plot_column}'
x_range_max = 1.0
fig = px.bar(df,
x=x_col,
y='Model',
color=x_col,
color_continuous_scale='Viridis',
labels={x_col: 'Accuracy', 'Model': 'Model'},
title=title,
orientation='h',
range_color=[0,1])
fig.update_layout(
xaxis=dict(range=[0, x_range_max]),
title=dict(text=title, font=dict(size=16)),
xaxis_title=dict(font=dict(size=12)),
yaxis_title=dict(font=dict(size=12)),
yaxis=dict(autorange="reversed"),
autosize=True
)
return fig
with gr.Blocks() as app:
with gr.Tabs():
with gr.TabItem("Armenian Unified Exams"):
gr.Markdown("# Armenian Unified Test Exams")
gr.HTML(f"""
<div style="font-size: 16px;">
This benchmark contains results of various Language Models on Armenian Unified Test Exams for Armenian language and literature, Armenian history and mathematics. The scoring system is a 20-point scale, where 0-8 is a Fail, 8-18 is a Pass, and 18-20 is a Distinction.
</div>
""")
table_output_armenian = gr.DataFrame(value=lambda: display_table("Armenian Exams"))
plot_column_dropdown = gr.Dropdown(choices=['Average', 'Armenian language and literature', 'Armenian history', 'Mathematics'], value='Average', label='Select Column to Plot')
plot_output_armenian = gr.Plot(lambda column: create_bar_chart("Armenian Exams", column), inputs=plot_column_dropdown)
with gr.TabItem("MMLU-Pro-Hy"):
gr.Markdown("# MMLU-Pro Translated to Armenian (MMLU-Pro-Hy)")
gr.HTML(f"""
<div style="font-size: 16px;">
This benchmark contains results of various Language Models on the MMLU-Pro benchmark, translated into Armenian. MMLU-Pro is a massive multi-task test in MCQA format. The scores represent accuracy.
</div>
""")
table_output_mmlu = gr.DataFrame(value=lambda: display_table("MMLU-Pro-Hy"))
subject_cols = ['Average','Biology', 'Business', 'Chemistry', 'Computer Science', 'Economics', 'Engineering', 'Health', 'History', 'Law', 'Math', 'Philosophy', 'Physics', 'Psychology','Other']
plot_column_dropdown_mmlu = gr.Dropdown(choices=subject_cols, value='Average', label='Select Column to Plot')
plot_output_mmlu = gr.Plot(lambda column: create_bar_chart("MMLU-Pro-Hy", column), inputs=plot_column_dropdown_mmlu)
app.launch(share=True, debug=True) |