Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -179,90 +179,269 @@ def build_demo():
|
|
179 |
# print(os.system('cd src/gen/ && python show_result.py --output'))
|
180 |
|
181 |
|
182 |
-
def update_board():
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
files_list = glob.glob("./m_data/model_data/external/*.json")
|
199 |
-
|
|
|
200 |
for file in files_list:
|
201 |
with open(file) as f:
|
202 |
-
|
203 |
try:
|
204 |
data = json.load(f)
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
data_list.append(data)
|
207 |
except Exception as e:
|
208 |
-
|
209 |
-
|
210 |
|
|
|
|
|
211 |
with open("genned.json", "w") as f:
|
212 |
json.dump(data_list, f)
|
213 |
-
|
214 |
-
|
215 |
API.upload_file(
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
)
|
221 |
-
restart_space()
|
222 |
-
|
223 |
-
|
224 |
-
# gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
|
225 |
-
# subprocess.run(["python3", gen_judgement_file], check=True)
|
226 |
|
227 |
-
def
|
228 |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
229 |
-
logging.info("Updating the judgement: %s", need_reset)
|
230 |
if need_reset != "1":
|
231 |
-
# return
|
232 |
pass
|
233 |
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
234 |
|
235 |
-
#
|
236 |
-
|
237 |
-
#
|
238 |
-
# the "m_data" directory before downloading new dataset files into it.
|
239 |
-
# shutil.rmtree("./m_data")
|
240 |
-
# shutil.rmtree("./data")
|
241 |
-
download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
242 |
-
data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
|
243 |
-
files_list = glob.glob("./m_data/model_data/external/*.json")
|
244 |
-
print(f'FILES LIST: {files_list}')
|
245 |
-
for file in files_list:
|
246 |
-
with open(file) as f:
|
247 |
-
print(f'trying to read external submit file: {file}')
|
248 |
-
try:
|
249 |
-
data = json.load(f)
|
250 |
-
print(f'succeed to read: {file}, got {len(data)}')
|
251 |
-
data_list.append(data)
|
252 |
-
except Exception as e:
|
253 |
-
pass # data was badly formatted, should not fail
|
254 |
-
print("DATALIST: ", data_list)
|
255 |
-
|
256 |
-
with open("genned.json", "w") as f:
|
257 |
-
json.dump(data_list, f)
|
258 |
|
|
|
|
|
|
|
|
|
259 |
|
260 |
-
API.upload_file(
|
261 |
-
path_or_fileobj="genned.json",
|
262 |
-
path_in_repo="leaderboard.json",
|
263 |
-
repo_id="kz-transformers/kaz-llm-lb-metainfo",
|
264 |
-
repo_type="dataset",
|
265 |
-
)
|
266 |
|
267 |
if __name__ == "__main__":
|
268 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
|
|
179 |
# print(os.system('cd src/gen/ && python show_result.py --output'))
|
180 |
|
181 |
|
182 |
+
# def update_board():
|
183 |
+
# need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
184 |
+
# logging.info("Updating the judgement: %s", need_reset)
|
185 |
+
# if need_reset != "1":
|
186 |
+
# # return
|
187 |
+
# pass
|
188 |
+
# os.environ[RESET_JUDGEMENT_ENV] = "0"
|
189 |
+
|
190 |
+
# # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
|
191 |
+
# # recursively. In this specific context, it is used to delete the directory named "m_data" along
|
192 |
+
# # with all its files and subdirectories. This command helps in cleaning up the existing data in
|
193 |
+
# # the "m_data" directory before downloading new dataset files into it.
|
194 |
+
# # shutil.rmtree("./m_data")
|
195 |
+
# # shutil.rmtree("./data")
|
196 |
+
# download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
197 |
+
# data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
|
198 |
+
# files_list = glob.glob("./m_data/model_data/external/*.json")
|
199 |
+
# print(f'FILES LIST: {files_list}')
|
200 |
+
# for file in files_list:
|
201 |
+
# with open(file) as f:
|
202 |
+
# print(f'trying to read external submit file: {file}')
|
203 |
+
# try:
|
204 |
+
# data = json.load(f)
|
205 |
+
# print(f'succeed to read: {file}, got {len(data)}')
|
206 |
+
# data_list.append(data)
|
207 |
+
# except Exception as e:
|
208 |
+
# pass # data was badly formatted, should not fail
|
209 |
+
# print("DATALIST: ", data_list)
|
210 |
+
|
211 |
+
# with open("genned.json", "w") as f:
|
212 |
+
# json.dump(data_list, f)
|
213 |
+
|
214 |
+
|
215 |
+
# API.upload_file(
|
216 |
+
# path_or_fileobj="genned.json",
|
217 |
+
# path_in_repo="leaderboard.json",
|
218 |
+
# repo_id="kz-transformers/kaz-llm-lb-metainfo",
|
219 |
+
# repo_type="dataset",
|
220 |
+
# )
|
221 |
+
# restart_space()
|
222 |
+
|
223 |
+
|
224 |
+
# # gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
|
225 |
+
# # subprocess.run(["python3", gen_judgement_file], check=True)
|
226 |
+
|
227 |
+
# def update_board_():
|
228 |
+
# need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
229 |
+
# logging.info("Updating the judgement: %s", need_reset)
|
230 |
+
# if need_reset != "1":
|
231 |
+
# # return
|
232 |
+
# pass
|
233 |
+
# os.environ[RESET_JUDGEMENT_ENV] = "0"
|
234 |
+
|
235 |
+
# # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
|
236 |
+
# # recursively. In this specific context, it is used to delete the directory named "m_data" along
|
237 |
+
# # with all its files and subdirectories. This command helps in cleaning up the existing data in
|
238 |
+
# # the "m_data" directory before downloading new dataset files into it.
|
239 |
+
# # shutil.rmtree("./m_data")
|
240 |
+
# # shutil.rmtree("./data")
|
241 |
+
# download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
242 |
+
# data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
|
243 |
+
# files_list = glob.glob("./m_data/model_data/external/*.json")
|
244 |
+
# print(f'FILES LIST: {files_list}')
|
245 |
+
# for file in files_list:
|
246 |
+
# with open(file) as f:
|
247 |
+
# print(f'trying to read external submit file: {file}')
|
248 |
+
# try:
|
249 |
+
# data = json.load(f)
|
250 |
+
# print(f'succeed to read: {file}, got {len(data)}')
|
251 |
+
# data_list.append(data)
|
252 |
+
# except Exception as e:
|
253 |
+
# pass # data was badly formatted, should not fail
|
254 |
+
# print("DATALIST: ", data_list)
|
255 |
+
|
256 |
+
# with open("genned.json", "w") as f:
|
257 |
+
# json.dump(data_list, f)
|
258 |
+
|
259 |
+
# API.upload_file(
|
260 |
+
# path_or_fileobj="genned.json",
|
261 |
+
# path_in_repo="leaderboard.json",
|
262 |
+
# repo_id="kz-transformers/kaz-llm-lb-metainfo",
|
263 |
+
# repo_type="dataset",
|
264 |
+
# )
|
265 |
+
|
266 |
+
def aggregate_leaderboard_data():
|
267 |
download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
268 |
+
|
269 |
+
# Start with your baseline data
|
270 |
+
data_list = [
|
271 |
+
{
|
272 |
+
"model_dtype": "torch.float16",
|
273 |
+
"model": "dummy-random-baseline",
|
274 |
+
"ppl": 0,
|
275 |
+
"mmlu_translated_kk": 0.22991508817766165,
|
276 |
+
"kk_constitution_mc": 0.25120772946859904,
|
277 |
+
"kk_dastur_mc": 0.24477611940298508,
|
278 |
+
"kazakh_and_literature_unt_mc": 0.2090443686006826,
|
279 |
+
"kk_geography_unt_mc": 0.2019790454016298,
|
280 |
+
"kk_world_history_unt_mc": 0.1986970684039088,
|
281 |
+
"kk_history_of_kazakhstan_unt_mc": 0.19417177914110428,
|
282 |
+
"kk_english_unt_mc": 0.189804278561675,
|
283 |
+
"kk_biology_unt_mc": 0.22330729166666666,
|
284 |
+
"kk_human_society_rights_unt_mc": 0.242152466367713,
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"model_dtype": "torch.float16",
|
288 |
+
"model": "gpt-4o-mini",
|
289 |
+
"ppl": 0,
|
290 |
+
"mmlu_translated_kk": 0.5623775310254735,
|
291 |
+
"kk_constitution_mc": 0.79,
|
292 |
+
"kk_dastur_mc": 0.755,
|
293 |
+
"kazakh_and_literature_unt_mc": 0.4953071672354949,
|
294 |
+
"kk_geography_unt_mc": 0.5675203725261933,
|
295 |
+
"kk_world_history_unt_mc": 0.6091205211726385,
|
296 |
+
"kk_history_of_kazakhstan_unt_mc": 0.47883435582822087,
|
297 |
+
"kk_english_unt_mc": 0.6763768775603095,
|
298 |
+
"kk_biology_unt_mc": 0.607421875,
|
299 |
+
"kk_human_society_rights_unt_mc": 0.7309417040358744,
|
300 |
+
},
|
301 |
+
{
|
302 |
+
"model_dtype": "api",
|
303 |
+
"model": "gpt-4o",
|
304 |
+
"ppl": 0,
|
305 |
+
"mmlu_translated_kk": 0.7419986936642717,
|
306 |
+
"kk_constitution_mc": 0.841,
|
307 |
+
"kk_dastur_mc": 0.798,
|
308 |
+
"kazakh_and_literature_unt_mc": 0.6785409556313993,
|
309 |
+
"kk_geography_unt_mc": 0.629802095459837,
|
310 |
+
"kk_world_history_unt_mc": 0.6783387622149837,
|
311 |
+
"kk_history_of_kazakhstan_unt_mc": 0.6785276073619632,
|
312 |
+
"kk_english_unt_mc": 0.7410104688211198,
|
313 |
+
"kk_biology_unt_mc": 0.6979166666666666,
|
314 |
+
"kk_human_society_rights_unt_mc": 0.7937219730941704,
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"model_dtype": "torch.float16",
|
318 |
+
"model": "nova-pro-v1",
|
319 |
+
"ppl": 0,
|
320 |
+
"mmlu_translated_kk": 0.6792945787067276,
|
321 |
+
"kk_constitution_mc": 0.7753623188405797,
|
322 |
+
"kk_dastur_mc": 0.718407960199005,
|
323 |
+
"kazakh_and_literature_unt_mc": 0.4656569965870307,
|
324 |
+
"kk_geography_unt_mc": 0.5541327124563445,
|
325 |
+
"kk_world_history_unt_mc": 0.6425081433224755,
|
326 |
+
"kk_history_of_kazakhstan_unt_mc": 0.5,
|
327 |
+
"kk_english_unt_mc": 0.6845698680018206,
|
328 |
+
"kk_biology_unt_mc": 0.6197916666666666,
|
329 |
+
"kk_human_society_rights_unt_mc": 0.7713004484304933,
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"model_dtype": "torch.float16",
|
333 |
+
"model": "gemini-1.5-pro",
|
334 |
+
"ppl": 0,
|
335 |
+
"mmlu_translated_kk": 0.7380796864794252,
|
336 |
+
"kk_constitution_mc": 0.8164251207729468,
|
337 |
+
"kk_dastur_mc": 0.7383084577114428,
|
338 |
+
"kazakh_and_literature_unt_mc": 0.5565273037542662,
|
339 |
+
"kk_geography_unt_mc": 0.6065192083818394,
|
340 |
+
"kk_world_history_unt_mc": 0.6669381107491856,
|
341 |
+
"kk_history_of_kazakhstan_unt_mc": 0.5791411042944785,
|
342 |
+
"kk_english_unt_mc": 0.7114246700045517,
|
343 |
+
"kk_biology_unt_mc": 0.6673177083333334,
|
344 |
+
"kk_human_society_rights_unt_mc": 0.7623318385650224,
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"model_dtype": "torch.float16",
|
348 |
+
"model": "gemini-1.5-flash",
|
349 |
+
"ppl": 0,
|
350 |
+
"mmlu_translated_kk": 0.6335728282168517,
|
351 |
+
"kk_constitution_mc": 0.748792270531401,
|
352 |
+
"kk_dastur_mc": 0.7054726368159204,
|
353 |
+
"kazakh_and_literature_unt_mc": 0.4761092150170648,
|
354 |
+
"kk_geography_unt_mc": 0.5640279394644936,
|
355 |
+
"kk_world_history_unt_mc": 0.5838762214983714,
|
356 |
+
"kk_history_of_kazakhstan_unt_mc": 0.43374233128834355,
|
357 |
+
"kk_english_unt_mc": 0.6681838871187984,
|
358 |
+
"kk_biology_unt_mc": 0.6217447916666666,
|
359 |
+
"kk_human_society_rights_unt_mc": 0.7040358744394619,
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"model_dtype": "torch.float16",
|
363 |
+
"model": "claude-3-5-sonnet",
|
364 |
+
"ppl": 0,
|
365 |
+
"mmlu_translated_kk": 0.7335075114304376,
|
366 |
+
"kk_constitution_mc": 0.8623188405797102,
|
367 |
+
"kk_dastur_mc": 0.7950248756218905,
|
368 |
+
"kazakh_and_literature_unt_mc": 0.6548634812286689,
|
369 |
+
"kk_geography_unt_mc": 0.6431897555296857,
|
370 |
+
"kk_world_history_unt_mc": 0.6669381107491856,
|
371 |
+
"kk_history_of_kazakhstan_unt_mc": 0.6251533742331289,
|
372 |
+
"kk_english_unt_mc": 0.7291761492944925,
|
373 |
+
"kk_biology_unt_mc": 0.6686197916666666,
|
374 |
+
"kk_human_society_rights_unt_mc": 0.8026905829596412,
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"model_dtype": "torch.float16",
|
378 |
+
"model": "yandex-gpt",
|
379 |
+
"ppl": 0,
|
380 |
+
"mmlu_translated_kk": 0.39777922926192033,
|
381 |
+
"kk_constitution_mc": 0.7028985507246377,
|
382 |
+
"kk_dastur_mc": 0.6159203980099502,
|
383 |
+
"kazakh_and_literature_unt_mc": 0.3914249146757679,
|
384 |
+
"kk_geography_unt_mc": 0.4912689173457509,
|
385 |
+
"kk_world_history_unt_mc": 0.5244299674267101,
|
386 |
+
"kk_history_of_kazakhstan_unt_mc": 0.4030674846625767,
|
387 |
+
"kk_english_unt_mc": 0.5844333181611289,
|
388 |
+
"kk_biology_unt_mc": 0.4368489583333333,
|
389 |
+
"kk_human_society_rights_unt_mc": 0.6995515695067265,
|
390 |
+
},
|
391 |
+
]
|
392 |
+
|
393 |
files_list = glob.glob("./m_data/model_data/external/*.json")
|
394 |
+
logging.info(f'FILES LIST: {files_list}')
|
395 |
+
|
396 |
for file in files_list:
|
397 |
with open(file) as f:
|
398 |
+
logging.info(f'Trying to read external submit file: {file}')
|
399 |
try:
|
400 |
data = json.load(f)
|
401 |
+
# Validate that data is a dict and has some required keys:
|
402 |
+
if not isinstance(data, dict):
|
403 |
+
logging.warning(f"File {file} is not a dict, skipping")
|
404 |
+
continue
|
405 |
+
required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'}
|
406 |
+
if not required_keys.issubset(data.keys()):
|
407 |
+
logging.warning(f"File {file} missing required keys, skipping")
|
408 |
+
continue
|
409 |
+
|
410 |
+
logging.info(f'Successfully read: {file}, got {len(data)} keys')
|
411 |
data_list.append(data)
|
412 |
except Exception as e:
|
413 |
+
logging.error(f"Error reading file {file}: {e}")
|
414 |
+
continue
|
415 |
|
416 |
+
logging.info("Combined data_list length: %d", len(data_list))
|
417 |
+
|
418 |
with open("genned.json", "w") as f:
|
419 |
json.dump(data_list, f)
|
420 |
+
|
|
|
421 |
API.upload_file(
|
422 |
+
path_or_fileobj="genned.json",
|
423 |
+
path_in_repo="leaderboard.json",
|
424 |
+
repo_id="kz-transformers/kaz-llm-lb-metainfo",
|
425 |
+
repo_type="dataset",
|
426 |
)
|
|
|
|
|
|
|
|
|
|
|
427 |
|
428 |
+
def update_board():
|
429 |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
430 |
+
logging.info("Updating the judgement (scheduled update): %s", need_reset)
|
431 |
if need_reset != "1":
|
432 |
+
# Optionally return early if no update is needed
|
433 |
pass
|
434 |
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
435 |
|
436 |
+
# Use the common aggregation function
|
437 |
+
aggregate_leaderboard_data()
|
438 |
+
restart_space() # Scheduled update restarts the space
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
|
440 |
+
def update_board_():
|
441 |
+
# Startup update (without restart)
|
442 |
+
logging.info("Updating the judgement at startup")
|
443 |
+
aggregate_leaderboard_data()
|
444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
|
446 |
if __name__ == "__main__":
|
447 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|