kz-transformers commited on
Commit
e7ae6b8
·
verified ·
1 Parent(s): 3095766

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +242 -63
app.py CHANGED
@@ -179,90 +179,269 @@ def build_demo():
179
  # print(os.system('cd src/gen/ && python show_result.py --output'))
180
 
181
 
182
- def update_board():
183
- need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
184
- logging.info("Updating the judgement: %s", need_reset)
185
- if need_reset != "1":
186
- # return
187
- pass
188
- os.environ[RESET_JUDGEMENT_ENV] = "0"
189
-
190
- # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
191
- # recursively. In this specific context, it is used to delete the directory named "m_data" along
192
- # with all its files and subdirectories. This command helps in cleaning up the existing data in
193
- # the "m_data" directory before downloading new dataset files into it.
194
- # shutil.rmtree("./m_data")
195
- # shutil.rmtree("./data")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  download_dataset("kz-transformers/s-openbench-eval", "m_data")
197
- data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  files_list = glob.glob("./m_data/model_data/external/*.json")
199
- print(f'FILES LIST: {files_list}')
 
200
  for file in files_list:
201
  with open(file) as f:
202
- print(f'trying to read external submit file: {file}')
203
  try:
204
  data = json.load(f)
205
- print(f'succeed to read: {file}, got {len(data)}')
 
 
 
 
 
 
 
 
 
206
  data_list.append(data)
207
  except Exception as e:
208
- pass # data was badly formatted, should not fail
209
- print("DATALIST: ", data_list)
210
 
 
 
211
  with open("genned.json", "w") as f:
212
  json.dump(data_list, f)
213
-
214
-
215
  API.upload_file(
216
- path_or_fileobj="genned.json",
217
- path_in_repo="leaderboard.json",
218
- repo_id="kz-transformers/kaz-llm-lb-metainfo",
219
- repo_type="dataset",
220
  )
221
- restart_space()
222
-
223
-
224
- # gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
225
- # subprocess.run(["python3", gen_judgement_file], check=True)
226
 
227
- def update_board_():
228
  need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
229
- logging.info("Updating the judgement: %s", need_reset)
230
  if need_reset != "1":
231
- # return
232
  pass
233
  os.environ[RESET_JUDGEMENT_ENV] = "0"
234
 
235
- # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
236
- # recursively. In this specific context, it is used to delete the directory named "m_data" along
237
- # with all its files and subdirectories. This command helps in cleaning up the existing data in
238
- # the "m_data" directory before downloading new dataset files into it.
239
- # shutil.rmtree("./m_data")
240
- # shutil.rmtree("./data")
241
- download_dataset("kz-transformers/s-openbench-eval", "m_data")
242
- data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
243
- files_list = glob.glob("./m_data/model_data/external/*.json")
244
- print(f'FILES LIST: {files_list}')
245
- for file in files_list:
246
- with open(file) as f:
247
- print(f'trying to read external submit file: {file}')
248
- try:
249
- data = json.load(f)
250
- print(f'succeed to read: {file}, got {len(data)}')
251
- data_list.append(data)
252
- except Exception as e:
253
- pass # data was badly formatted, should not fail
254
- print("DATALIST: ", data_list)
255
-
256
- with open("genned.json", "w") as f:
257
- json.dump(data_list, f)
258
 
 
 
 
 
259
 
260
- API.upload_file(
261
- path_or_fileobj="genned.json",
262
- path_in_repo="leaderboard.json",
263
- repo_id="kz-transformers/kaz-llm-lb-metainfo",
264
- repo_type="dataset",
265
- )
266
 
267
  if __name__ == "__main__":
268
  os.environ[RESET_JUDGEMENT_ENV] = "1"
 
179
  # print(os.system('cd src/gen/ && python show_result.py --output'))
180
 
181
 
182
+ # def update_board():
183
+ # need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
184
+ # logging.info("Updating the judgement: %s", need_reset)
185
+ # if need_reset != "1":
186
+ # # return
187
+ # pass
188
+ # os.environ[RESET_JUDGEMENT_ENV] = "0"
189
+
190
+ # # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
191
+ # # recursively. In this specific context, it is used to delete the directory named "m_data" along
192
+ # # with all its files and subdirectories. This command helps in cleaning up the existing data in
193
+ # # the "m_data" directory before downloading new dataset files into it.
194
+ # # shutil.rmtree("./m_data")
195
+ # # shutil.rmtree("./data")
196
+ # download_dataset("kz-transformers/s-openbench-eval", "m_data")
197
+ # data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
198
+ # files_list = glob.glob("./m_data/model_data/external/*.json")
199
+ # print(f'FILES LIST: {files_list}')
200
+ # for file in files_list:
201
+ # with open(file) as f:
202
+ # print(f'trying to read external submit file: {file}')
203
+ # try:
204
+ # data = json.load(f)
205
+ # print(f'succeed to read: {file}, got {len(data)}')
206
+ # data_list.append(data)
207
+ # except Exception as e:
208
+ # pass # data was badly formatted, should not fail
209
+ # print("DATALIST: ", data_list)
210
+
211
+ # with open("genned.json", "w") as f:
212
+ # json.dump(data_list, f)
213
+
214
+
215
+ # API.upload_file(
216
+ # path_or_fileobj="genned.json",
217
+ # path_in_repo="leaderboard.json",
218
+ # repo_id="kz-transformers/kaz-llm-lb-metainfo",
219
+ # repo_type="dataset",
220
+ # )
221
+ # restart_space()
222
+
223
+
224
+ # # gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
225
+ # # subprocess.run(["python3", gen_judgement_file], check=True)
226
+
227
+ # def update_board_():
228
+ # need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
229
+ # logging.info("Updating the judgement: %s", need_reset)
230
+ # if need_reset != "1":
231
+ # # return
232
+ # pass
233
+ # os.environ[RESET_JUDGEMENT_ENV] = "0"
234
+
235
+ # # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
236
+ # # recursively. In this specific context, it is used to delete the directory named "m_data" along
237
+ # # with all its files and subdirectories. This command helps in cleaning up the existing data in
238
+ # # the "m_data" directory before downloading new dataset files into it.
239
+ # # shutil.rmtree("./m_data")
240
+ # # shutil.rmtree("./data")
241
+ # download_dataset("kz-transformers/s-openbench-eval", "m_data")
242
+ # data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
243
+ # files_list = glob.glob("./m_data/model_data/external/*.json")
244
+ # print(f'FILES LIST: {files_list}')
245
+ # for file in files_list:
246
+ # with open(file) as f:
247
+ # print(f'trying to read external submit file: {file}')
248
+ # try:
249
+ # data = json.load(f)
250
+ # print(f'succeed to read: {file}, got {len(data)}')
251
+ # data_list.append(data)
252
+ # except Exception as e:
253
+ # pass # data was badly formatted, should not fail
254
+ # print("DATALIST: ", data_list)
255
+
256
+ # with open("genned.json", "w") as f:
257
+ # json.dump(data_list, f)
258
+
259
+ # API.upload_file(
260
+ # path_or_fileobj="genned.json",
261
+ # path_in_repo="leaderboard.json",
262
+ # repo_id="kz-transformers/kaz-llm-lb-metainfo",
263
+ # repo_type="dataset",
264
+ # )
265
+
266
+ def aggregate_leaderboard_data():
267
  download_dataset("kz-transformers/s-openbench-eval", "m_data")
268
+
269
+ # Start with your baseline data
270
+ data_list = [
271
+ {
272
+ "model_dtype": "torch.float16",
273
+ "model": "dummy-random-baseline",
274
+ "ppl": 0,
275
+ "mmlu_translated_kk": 0.22991508817766165,
276
+ "kk_constitution_mc": 0.25120772946859904,
277
+ "kk_dastur_mc": 0.24477611940298508,
278
+ "kazakh_and_literature_unt_mc": 0.2090443686006826,
279
+ "kk_geography_unt_mc": 0.2019790454016298,
280
+ "kk_world_history_unt_mc": 0.1986970684039088,
281
+ "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428,
282
+ "kk_english_unt_mc": 0.189804278561675,
283
+ "kk_biology_unt_mc": 0.22330729166666666,
284
+ "kk_human_society_rights_unt_mc": 0.242152466367713,
285
+ },
286
+ {
287
+ "model_dtype": "torch.float16",
288
+ "model": "gpt-4o-mini",
289
+ "ppl": 0,
290
+ "mmlu_translated_kk": 0.5623775310254735,
291
+ "kk_constitution_mc": 0.79,
292
+ "kk_dastur_mc": 0.755,
293
+ "kazakh_and_literature_unt_mc": 0.4953071672354949,
294
+ "kk_geography_unt_mc": 0.5675203725261933,
295
+ "kk_world_history_unt_mc": 0.6091205211726385,
296
+ "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087,
297
+ "kk_english_unt_mc": 0.6763768775603095,
298
+ "kk_biology_unt_mc": 0.607421875,
299
+ "kk_human_society_rights_unt_mc": 0.7309417040358744,
300
+ },
301
+ {
302
+ "model_dtype": "api",
303
+ "model": "gpt-4o",
304
+ "ppl": 0,
305
+ "mmlu_translated_kk": 0.7419986936642717,
306
+ "kk_constitution_mc": 0.841,
307
+ "kk_dastur_mc": 0.798,
308
+ "kazakh_and_literature_unt_mc": 0.6785409556313993,
309
+ "kk_geography_unt_mc": 0.629802095459837,
310
+ "kk_world_history_unt_mc": 0.6783387622149837,
311
+ "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632,
312
+ "kk_english_unt_mc": 0.7410104688211198,
313
+ "kk_biology_unt_mc": 0.6979166666666666,
314
+ "kk_human_society_rights_unt_mc": 0.7937219730941704,
315
+ },
316
+ {
317
+ "model_dtype": "torch.float16",
318
+ "model": "nova-pro-v1",
319
+ "ppl": 0,
320
+ "mmlu_translated_kk": 0.6792945787067276,
321
+ "kk_constitution_mc": 0.7753623188405797,
322
+ "kk_dastur_mc": 0.718407960199005,
323
+ "kazakh_and_literature_unt_mc": 0.4656569965870307,
324
+ "kk_geography_unt_mc": 0.5541327124563445,
325
+ "kk_world_history_unt_mc": 0.6425081433224755,
326
+ "kk_history_of_kazakhstan_unt_mc": 0.5,
327
+ "kk_english_unt_mc": 0.6845698680018206,
328
+ "kk_biology_unt_mc": 0.6197916666666666,
329
+ "kk_human_society_rights_unt_mc": 0.7713004484304933,
330
+ },
331
+ {
332
+ "model_dtype": "torch.float16",
333
+ "model": "gemini-1.5-pro",
334
+ "ppl": 0,
335
+ "mmlu_translated_kk": 0.7380796864794252,
336
+ "kk_constitution_mc": 0.8164251207729468,
337
+ "kk_dastur_mc": 0.7383084577114428,
338
+ "kazakh_and_literature_unt_mc": 0.5565273037542662,
339
+ "kk_geography_unt_mc": 0.6065192083818394,
340
+ "kk_world_history_unt_mc": 0.6669381107491856,
341
+ "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785,
342
+ "kk_english_unt_mc": 0.7114246700045517,
343
+ "kk_biology_unt_mc": 0.6673177083333334,
344
+ "kk_human_society_rights_unt_mc": 0.7623318385650224,
345
+ },
346
+ {
347
+ "model_dtype": "torch.float16",
348
+ "model": "gemini-1.5-flash",
349
+ "ppl": 0,
350
+ "mmlu_translated_kk": 0.6335728282168517,
351
+ "kk_constitution_mc": 0.748792270531401,
352
+ "kk_dastur_mc": 0.7054726368159204,
353
+ "kazakh_and_literature_unt_mc": 0.4761092150170648,
354
+ "kk_geography_unt_mc": 0.5640279394644936,
355
+ "kk_world_history_unt_mc": 0.5838762214983714,
356
+ "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355,
357
+ "kk_english_unt_mc": 0.6681838871187984,
358
+ "kk_biology_unt_mc": 0.6217447916666666,
359
+ "kk_human_society_rights_unt_mc": 0.7040358744394619,
360
+ },
361
+ {
362
+ "model_dtype": "torch.float16",
363
+ "model": "claude-3-5-sonnet",
364
+ "ppl": 0,
365
+ "mmlu_translated_kk": 0.7335075114304376,
366
+ "kk_constitution_mc": 0.8623188405797102,
367
+ "kk_dastur_mc": 0.7950248756218905,
368
+ "kazakh_and_literature_unt_mc": 0.6548634812286689,
369
+ "kk_geography_unt_mc": 0.6431897555296857,
370
+ "kk_world_history_unt_mc": 0.6669381107491856,
371
+ "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289,
372
+ "kk_english_unt_mc": 0.7291761492944925,
373
+ "kk_biology_unt_mc": 0.6686197916666666,
374
+ "kk_human_society_rights_unt_mc": 0.8026905829596412,
375
+ },
376
+ {
377
+ "model_dtype": "torch.float16",
378
+ "model": "yandex-gpt",
379
+ "ppl": 0,
380
+ "mmlu_translated_kk": 0.39777922926192033,
381
+ "kk_constitution_mc": 0.7028985507246377,
382
+ "kk_dastur_mc": 0.6159203980099502,
383
+ "kazakh_and_literature_unt_mc": 0.3914249146757679,
384
+ "kk_geography_unt_mc": 0.4912689173457509,
385
+ "kk_world_history_unt_mc": 0.5244299674267101,
386
+ "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767,
387
+ "kk_english_unt_mc": 0.5844333181611289,
388
+ "kk_biology_unt_mc": 0.4368489583333333,
389
+ "kk_human_society_rights_unt_mc": 0.6995515695067265,
390
+ },
391
+ ]
392
+
393
  files_list = glob.glob("./m_data/model_data/external/*.json")
394
+ logging.info(f'FILES LIST: {files_list}')
395
+
396
  for file in files_list:
397
  with open(file) as f:
398
+ logging.info(f'Trying to read external submit file: {file}')
399
  try:
400
  data = json.load(f)
401
+ # Validate that data is a dict and has some required keys:
402
+ if not isinstance(data, dict):
403
+ logging.warning(f"File {file} is not a dict, skipping")
404
+ continue
405
+ required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'}
406
+ if not required_keys.issubset(data.keys()):
407
+ logging.warning(f"File {file} missing required keys, skipping")
408
+ continue
409
+
410
+ logging.info(f'Successfully read: {file}, got {len(data)} keys')
411
  data_list.append(data)
412
  except Exception as e:
413
+ logging.error(f"Error reading file {file}: {e}")
414
+ continue
415
 
416
+ logging.info("Combined data_list length: %d", len(data_list))
417
+
418
  with open("genned.json", "w") as f:
419
  json.dump(data_list, f)
420
+
 
421
  API.upload_file(
422
+ path_or_fileobj="genned.json",
423
+ path_in_repo="leaderboard.json",
424
+ repo_id="kz-transformers/kaz-llm-lb-metainfo",
425
+ repo_type="dataset",
426
  )
 
 
 
 
 
427
 
428
+ def update_board():
429
  need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
430
+ logging.info("Updating the judgement (scheduled update): %s", need_reset)
431
  if need_reset != "1":
432
+ # Optionally return early if no update is needed
433
  pass
434
  os.environ[RESET_JUDGEMENT_ENV] = "0"
435
 
436
+ # Use the common aggregation function
437
+ aggregate_leaderboard_data()
438
+ restart_space() # Scheduled update restarts the space
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
+ def update_board_():
441
+ # Startup update (without restart)
442
+ logging.info("Updating the judgement at startup")
443
+ aggregate_leaderboard_data()
444
 
 
 
 
 
 
 
445
 
446
  if __name__ == "__main__":
447
  os.environ[RESET_JUDGEMENT_ENV] = "1"