Spaces:
Running
Running
commit
Browse files- app.py +10 -10
- data_handler.py +0 -10
app.py
CHANGED
@@ -17,8 +17,11 @@ def refresh_data():
|
|
17 |
|
18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
19 |
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
def main():
|
24 |
global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
|
@@ -60,10 +63,10 @@ def main():
|
|
60 |
**Creator Company:** Metric AI Research Lab, Yerevan, Armenia."""
|
61 |
)
|
62 |
gr.Image("logo.png", width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False)
|
63 |
-
gr.Markdown("""
|
64 |
- [Website](https://metric.am/)
|
65 |
- [Hugging Face](https://huggingface.co/Metric-AI)
|
66 |
-
|
67 |
MMLU-Pro-Hy is a massive multi-task test in MCQA format, inspired by the original MMLU benchmark, adapted for the Armenian language. The Armenian Unified Exams benchmark allows for comparison with human-level knowledge.
|
68 |
"""
|
69 |
)
|
@@ -72,7 +75,7 @@ def main():
|
|
72 |
"""
|
73 |
To submit a model for evaluation, please follow these steps:
|
74 |
1. **Evaluate your model**:
|
75 |
-
|
76 |
2. **Format your submission file**:
|
77 |
- After evaluation, you will get a `result.json` file. Ensure the file follows this format:
|
78 |
```json
|
@@ -111,12 +114,9 @@ def main():
|
|
111 |
refresh_button = gr.Button("Refresh Data")
|
112 |
refresh_button.click(
|
113 |
fn=refresh_data,
|
114 |
-
outputs=[table_output_armenian,
|
115 |
-
table_output_mmlu,
|
116 |
-
plot_output_armenian,
|
117 |
-
plot_output_mmlu
|
118 |
-
],
|
119 |
)
|
|
|
120 |
app.launch(share=True, debug=True)
|
121 |
|
122 |
if __name__ == "__main__":
|
|
|
17 |
|
18 |
global_output_armenian = unified_exam_result_table(global_unified_exam_df)
|
19 |
global_output_mmlu = mmlu_result_table(global_mmlu_df)
|
20 |
+
|
21 |
+
unified_chart = unified_exam_chart(global_output_armenian, 'Average')
|
22 |
+
mmlu_chart_output = mmlu_chart(global_output_mmlu, 'Average')
|
23 |
+
|
24 |
+
return global_output_armenian, global_output_mmlu, unified_chart, mmlu_chart_output, 'Average', 'Average'
|
25 |
|
26 |
def main():
|
27 |
global global_mmlu_df, global_unified_exam_df, global_output_armenian, global_output_mmlu
|
|
|
63 |
**Creator Company:** Metric AI Research Lab, Yerevan, Armenia."""
|
64 |
)
|
65 |
gr.Image("logo.png", width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, show_share_button=False)
|
66 |
+
gr.Markdown("""
|
67 |
- [Website](https://metric.am/)
|
68 |
- [Hugging Face](https://huggingface.co/Metric-AI)
|
69 |
+
|
70 |
MMLU-Pro-Hy is a massive multi-task test in MCQA format, inspired by the original MMLU benchmark, adapted for the Armenian language. The Armenian Unified Exams benchmark allows for comparison with human-level knowledge.
|
71 |
"""
|
72 |
)
|
|
|
75 |
"""
|
76 |
To submit a model for evaluation, please follow these steps:
|
77 |
1. **Evaluate your model**:
|
78 |
+
- Follow the evaluation script provided here: [https://github.com/Anania-AI/Arm-LLM-Benchmark](https://github.com/Anania-AI/Arm-LLM-Benchmark)
|
79 |
2. **Format your submission file**:
|
80 |
- After evaluation, you will get a `result.json` file. Ensure the file follows this format:
|
81 |
```json
|
|
|
114 |
refresh_button = gr.Button("Refresh Data")
|
115 |
refresh_button.click(
|
116 |
fn=refresh_data,
|
117 |
+
outputs=[table_output_armenian, table_output_mmlu, plot_output_armenian, plot_output_mmlu, plot_column_dropdown_unified_exam, plot_column_dropdown_mmlu],
|
|
|
|
|
|
|
|
|
118 |
)
|
119 |
+
|
120 |
app.launch(share=True, debug=True)
|
121 |
|
122 |
if __name__ == "__main__":
|
data_handler.py
CHANGED
@@ -59,10 +59,6 @@ def unified_exam_chart(unified_exam_df, plot_column):
|
|
59 |
title=title,
|
60 |
orientation='h'
|
61 |
)
|
62 |
-
# max_chart_height = 600
|
63 |
-
|
64 |
-
# chart_height = df.shape[0] * 50
|
65 |
-
# chart_height = min(chart_height, max_chart_height)
|
66 |
|
67 |
fig.update_layout(
|
68 |
xaxis=dict(range=[0, x_range_max]),
|
@@ -70,7 +66,6 @@ def unified_exam_chart(unified_exam_df, plot_column):
|
|
70 |
xaxis_title=dict(font=dict(size=12)),
|
71 |
yaxis_title=dict(font=dict(size=12)),
|
72 |
yaxis=dict(autorange="reversed"),
|
73 |
-
# height=chart_height,
|
74 |
width=1400
|
75 |
)
|
76 |
return fig
|
@@ -93,10 +88,6 @@ def mmlu_chart(mmlu_df, plot_column):
|
|
93 |
orientation='h',
|
94 |
range_color=[0,1]
|
95 |
)
|
96 |
-
# max_chart_height = 600
|
97 |
-
|
98 |
-
# chart_height = df.shape[0] * 50
|
99 |
-
# chart_height = min(chart_height, max_chart_height)
|
100 |
|
101 |
fig.update_layout(
|
102 |
xaxis=dict(range=[0, x_range_max]),
|
@@ -104,7 +95,6 @@ def mmlu_chart(mmlu_df, plot_column):
|
|
104 |
xaxis_title=dict(font=dict(size=12)),
|
105 |
yaxis_title=dict(font=dict(size=12)),
|
106 |
yaxis=dict(autorange="reversed"),
|
107 |
-
# height=chart_height,
|
108 |
width=1400
|
109 |
)
|
110 |
return fig
|
|
|
59 |
title=title,
|
60 |
orientation='h'
|
61 |
)
|
|
|
|
|
|
|
|
|
62 |
|
63 |
fig.update_layout(
|
64 |
xaxis=dict(range=[0, x_range_max]),
|
|
|
66 |
xaxis_title=dict(font=dict(size=12)),
|
67 |
yaxis_title=dict(font=dict(size=12)),
|
68 |
yaxis=dict(autorange="reversed"),
|
|
|
69 |
width=1400
|
70 |
)
|
71 |
return fig
|
|
|
88 |
orientation='h',
|
89 |
range_color=[0,1]
|
90 |
)
|
|
|
|
|
|
|
|
|
91 |
|
92 |
fig.update_layout(
|
93 |
xaxis=dict(range=[0, x_range_max]),
|
|
|
95 |
xaxis_title=dict(font=dict(size=12)),
|
96 |
yaxis_title=dict(font=dict(size=12)),
|
97 |
yaxis=dict(autorange="reversed"),
|
|
|
98 |
width=1400
|
99 |
)
|
100 |
return fig
|