kz-transformers commited on
Commit
431078d
·
verified ·
1 Parent(s): c36f1ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -178
app.py CHANGED
@@ -1,29 +1,3 @@
1
- from starlette.requests import Request
2
- from pydantic_core import core_schema
3
-
4
- @classmethod
5
- def request_core_schema(cls, source, handler):
6
- # Return a basic string core schema without a default.
7
- return core_schema.str_schema()
8
-
9
- @classmethod
10
- def request_json_schema(cls, core_schema_value, handler):
11
- # Return an explicit JSON Schema for Request objects.
12
- return {
13
- "type": "string",
14
- "title": "Request",
15
- "description": "A Request object is represented as a string.",
16
- "default": ""
17
- }
18
-
19
- # Patch the Request class with these methods.
20
- Request.__get_pydantic_core_schema__ = request_core_schema
21
- Request.__get_pydantic_json_schema__ = request_json_schema
22
-
23
-
24
-
25
- import pydantic
26
- print("!!!! PYDANTIC VERSION", pydantic.__version__)
27
  import logging
28
  import os
29
  os.makedirs("tmp", exist_ok=True)
@@ -40,30 +14,34 @@ from gradio_space_ci import enable_space_ci
40
  import json
41
  from io import BytesIO
42
 
 
 
 
 
43
  def handle_file_upload(file):
44
  file_path = file.name.split("/")[-1] if "/" in file.name else file.name
45
  logging.info("File uploaded: %s", file_path)
46
  with open(file.name, "r") as f:
47
  v = json.load(f)
48
  return v, file_path
49
- def submit_file(v, file_path, mn, profile):
 
 
 
50
  print('START SUBMITTING!!!')
51
- if profile is None:
52
- return "Hub Login Required"
53
- print('PROFILE: ', profile.__dict__)
54
- print('MN:', mn)
55
  new_file = v['results']
56
- if profile.username == 'kz-transformers':
57
- new_file['model'] = mn
58
- else:
59
- new_file['model'] = profile.username + "/" + mn
60
 
 
61
  columns = [
62
- 'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc', 'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
63
- 'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc', 'kk_english_unt_mc', 'kk_biology_unt_mc',
64
- 'kk_human_society_rights_unt_mc'
 
65
  ]
66
 
 
67
  for column in columns:
68
  new_file[column] = new_file[column]['acc,none']
69
 
@@ -73,25 +51,22 @@ def submit_file(v, file_path, mn, profile):
73
  print('WE READ FILE: ', new_file)
74
  buf = BytesIO()
75
  buf.write(json.dumps(new_file).encode('utf-8'))
 
76
  API.upload_file(
77
  path_or_fileobj=buf,
78
- path_in_repo="model_data/external/" + profile.username + '__' + mn.replace('/', '__') + ".json",
79
  repo_id="kz-transformers/s-openbench-eval",
80
  repo_type="dataset",
81
  )
82
  os.environ[RESET_JUDGEMENT_ENV] = "1"
83
  return "Success!"
84
 
85
- from src.display.about import (
86
- INTRODUCTION_TEXT,
87
- TITLE,
88
- LLM_BENCHMARKS_TEXT
89
- )
90
  from src.display.css_html_js import custom_css
91
- from src.display.utils import (
92
- AutoEvalColumn,
93
- fields,
94
- )
95
  from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
96
  from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
97
  import huggingface_hub
@@ -102,9 +77,9 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
102
  # Configure logging
103
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
104
 
105
- # Start ephemeral Spaces on PRs (see config in README.md)
106
  enable_space_ci()
107
-
108
  # download_openbench()
109
 
110
  def restart_space():
@@ -114,6 +89,9 @@ def restart_space():
114
  def update_plot(selected_models):
115
  return create_plot(selected_models)
116
 
 
 
 
117
  def build_demo():
118
  download_openbench()
119
  demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
@@ -134,16 +112,9 @@ def build_demo():
134
  ),
135
  search_columns=[
136
  AutoEvalColumn.model.name,
137
- # AutoEvalColumn.fullname.name,
138
- # AutoEvalColumn.license.name
139
  ],
140
  )
141
 
142
- # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
143
- # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
144
- # with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
145
- # gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
146
-
147
  with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
148
  with gr.Row():
149
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
@@ -151,22 +122,14 @@ def build_demo():
151
  gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
152
 
153
  with gr.Column():
154
-
155
  model_name_textbox = gr.Textbox(label="Model name")
156
- # submitter_username = gr.Textbox(label="Username")
157
-
158
- # def toggle_upload_button(model_name, username):
159
- # return bool(model_name) and bool(username)
160
  file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
161
- # upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary')
162
  uploaded_file = gr.State()
163
  file_path = gr.State()
164
  with gr.Row():
165
  with gr.Column():
166
  out = gr.Textbox("Статус отправки")
167
- with gr.Column():
168
- login_button = gr.LoginButton(elem_id="oauth-button")
169
-
170
  submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
171
 
172
  file_output.upload(
@@ -182,113 +145,26 @@ def build_demo():
182
  )
183
 
184
  with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
185
- with gr.Column():
186
- model_dropdown = gr.Dropdown(
187
- choices=leaderboard_df["model"].tolist(),
188
- label="Models",
189
- value=leaderboard_df["model"].tolist(),
190
- multiselect=True,
191
- info="Select models"
192
- )
193
- with gr.Column():
194
- plot = gr.Plot(update_plot(model_dropdown.value))
195
- # plot = gr.Plot()
196
- model_dropdown.change(
197
- fn=update_plot,
198
- inputs=[model_dropdown],
199
- outputs=[plot]
200
  )
201
- return demo
202
-
203
-
204
- # print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
205
- # print(os.system('cd src/gen/ && python show_result.py --output'))
206
-
207
-
208
- # def update_board():
209
- # need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
210
- # logging.info("Updating the judgement: %s", need_reset)
211
- # if need_reset != "1":
212
- # # return
213
- # pass
214
- # os.environ[RESET_JUDGEMENT_ENV] = "0"
215
-
216
- # # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
217
- # # recursively. In this specific context, it is used to delete the directory named "m_data" along
218
- # # with all its files and subdirectories. This command helps in cleaning up the existing data in
219
- # # the "m_data" directory before downloading new dataset files into it.
220
- # # shutil.rmtree("./m_data")
221
- # # shutil.rmtree("./data")
222
- # download_dataset("kz-transformers/s-openbench-eval", "m_data")
223
- # data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
224
- # files_list = glob.glob("./m_data/model_data/external/*.json")
225
- # print(f'FILES LIST: {files_list}')
226
- # for file in files_list:
227
- # with open(file) as f:
228
- # print(f'trying to read external submit file: {file}')
229
- # try:
230
- # data = json.load(f)
231
- # print(f'succeed to read: {file}, got {len(data)}')
232
- # data_list.append(data)
233
- # except Exception as e:
234
- # pass # data was badly formatted, should not fail
235
- # print("DATALIST: ", data_list)
236
-
237
- # with open("genned.json", "w") as f:
238
- # json.dump(data_list, f)
239
-
240
-
241
- # API.upload_file(
242
- # path_or_fileobj="genned.json",
243
- # path_in_repo="leaderboard.json",
244
- # repo_id="kz-transformers/kaz-llm-lb-metainfo",
245
- # repo_type="dataset",
246
- # )
247
- # restart_space()
248
-
249
-
250
- # # gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
251
- # # subprocess.run(["python3", gen_judgement_file], check=True)
252
-
253
- # def update_board_():
254
- # need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
255
- # logging.info("Updating the judgement: %s", need_reset)
256
- # if need_reset != "1":
257
- # # return
258
- # pass
259
- # os.environ[RESET_JUDGEMENT_ENV] = "0"
260
-
261
- # # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
262
- # # recursively. In this specific context, it is used to delete the directory named "m_data" along
263
- # # with all its files and subdirectories. This command helps in cleaning up the existing data in
264
- # # the "m_data" directory before downloading new dataset files into it.
265
- # # shutil.rmtree("./m_data")
266
- # # shutil.rmtree("./data")
267
- # download_dataset("kz-transformers/s-openbench-eval", "m_data")
268
- # data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
269
- # files_list = glob.glob("./m_data/model_data/external/*.json")
270
- # print(f'FILES LIST: {files_list}')
271
- # for file in files_list:
272
- # with open(file) as f:
273
- # print(f'trying to read external submit file: {file}')
274
- # try:
275
- # data = json.load(f)
276
- # print(f'succeed to read: {file}, got {len(data)}')
277
- # data_list.append(data)
278
- # except Exception as e:
279
- # pass # data was badly formatted, should not fail
280
- # print("DATALIST: ", data_list)
281
-
282
- # with open("genned.json", "w") as f:
283
- # json.dump(data_list, f)
284
-
285
- # API.upload_file(
286
- # path_or_fileobj="genned.json",
287
- # path_in_repo="leaderboard.json",
288
- # repo_id="kz-transformers/kaz-llm-lb-metainfo",
289
- # repo_type="dataset",
290
- # )
291
 
 
 
 
292
  def aggregate_leaderboard_data():
293
  download_dataset("kz-transformers/s-openbench-eval", "m_data")
294
 
@@ -413,7 +289,7 @@ def aggregate_leaderboard_data():
413
  "kk_english_unt_mc": 0.5844333181611289,
414
  "kk_biology_unt_mc": 0.4368489583333333,
415
  "kk_human_society_rights_unt_mc": 0.6995515695067265,
416
- },
417
  ]
418
 
419
  files_list = glob.glob("./m_data/model_data/external/*.json")
@@ -455,20 +331,18 @@ def update_board():
455
  need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
456
  logging.info("Updating the judgement (scheduled update): %s", need_reset)
457
  if need_reset != "1":
458
- # Optionally return early if no update is needed
459
  pass
460
  os.environ[RESET_JUDGEMENT_ENV] = "0"
461
-
462
- # Use the common aggregation function
463
  aggregate_leaderboard_data()
464
- restart_space() # Scheduled update restarts the space
465
 
466
  def update_board_():
467
- # Startup update (without restart)
468
  logging.info("Updating the judgement at startup")
469
  aggregate_leaderboard_data()
470
 
471
-
 
 
472
  if __name__ == "__main__":
473
  os.environ[RESET_JUDGEMENT_ENV] = "1"
474
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import os
3
  os.makedirs("tmp", exist_ok=True)
 
14
  import json
15
  from io import BytesIO
16
 
17
+ # ---------------------------
18
+ # File upload and submission handlers
19
+ # ---------------------------
20
+
21
  def handle_file_upload(file):
22
  file_path = file.name.split("/")[-1] if "/" in file.name else file.name
23
  logging.info("File uploaded: %s", file_path)
24
  with open(file.name, "r") as f:
25
  v = json.load(f)
26
  return v, file_path
27
+
28
+ def submit_file(v, file_path, mn):
29
+ # We remove the use of a "profile" custom type.
30
+ # Instead, simply assign the model name directly.
31
  print('START SUBMITTING!!!')
32
+ # In this version, we assume no authentication is necessary.
 
 
 
33
  new_file = v['results']
34
+ new_file['model'] = mn # Directly assign the provided model name.
 
 
 
35
 
36
+ # List of expected columns
37
  columns = [
38
+ 'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
39
+ 'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
40
+ 'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc',
41
+ 'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc'
42
  ]
43
 
44
+ # Process each column to extract the accuracy value.
45
  for column in columns:
46
  new_file[column] = new_file[column]['acc,none']
47
 
 
51
  print('WE READ FILE: ', new_file)
52
  buf = BytesIO()
53
  buf.write(json.dumps(new_file).encode('utf-8'))
54
+ # Upload the processed file (assuming API is imported from src.envs)
55
  API.upload_file(
56
  path_or_fileobj=buf,
57
+ path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json",
58
  repo_id="kz-transformers/s-openbench-eval",
59
  repo_type="dataset",
60
  )
61
  os.environ[RESET_JUDGEMENT_ENV] = "1"
62
  return "Success!"
63
 
64
+ # ---------------------------
65
+ # Import display and environment configuration.
66
+ # ---------------------------
67
+ from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT
 
68
  from src.display.css_html_js import custom_css
69
+ from src.display.utils import AutoEvalColumn, fields
 
 
 
70
  from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
71
  from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
72
  import huggingface_hub
 
77
  # Configure logging
78
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
79
 
80
+ # Enable space CI (if required)
81
  enable_space_ci()
82
+ # Optionally, you can call download_openbench() here if needed.
83
  # download_openbench()
84
 
85
  def restart_space():
 
89
  def update_plot(selected_models):
90
  return create_plot(selected_models)
91
 
92
+ # ---------------------------
93
+ # Build Demo Interface (with removed OAuth custom types)
94
+ # ---------------------------
95
  def build_demo():
96
  download_openbench()
97
  demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
 
112
  ),
113
  search_columns=[
114
  AutoEvalColumn.model.name,
 
 
115
  ],
116
  )
117
 
 
 
 
 
 
118
  with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
119
  with gr.Row():
120
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
122
  gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
123
 
124
  with gr.Column():
 
125
  model_name_textbox = gr.Textbox(label="Model name")
 
 
 
 
126
  file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
127
+ # Remove the OAuth login button so that no custom type is used.
128
  uploaded_file = gr.State()
129
  file_path = gr.State()
130
  with gr.Row():
131
  with gr.Column():
132
  out = gr.Textbox("Статус отправки")
 
 
 
133
  submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
134
 
135
  file_output.upload(
 
145
  )
146
 
147
  with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
148
+ with gr.Column():
149
+ model_dropdown = gr.Dropdown(
150
+ choices=leaderboard_df["model"].tolist(),
151
+ label="Models",
152
+ value=leaderboard_df["model"].tolist(),
153
+ multiselect=True,
154
+ info="Select models"
 
 
 
 
 
 
 
 
155
  )
156
+ with gr.Column():
157
+ plot = gr.Plot(update_plot(model_dropdown.value))
158
+ model_dropdown.change(
159
+ fn=update_plot,
160
+ inputs=[model_dropdown],
161
+ outputs=[plot]
162
+ )
163
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ # ---------------------------
166
+ # Aggregation functions to update leaderboard data.
167
+ # ---------------------------
168
  def aggregate_leaderboard_data():
169
  download_dataset("kz-transformers/s-openbench-eval", "m_data")
170
 
 
289
  "kk_english_unt_mc": 0.5844333181611289,
290
  "kk_biology_unt_mc": 0.4368489583333333,
291
  "kk_human_society_rights_unt_mc": 0.6995515695067265,
292
+ }
293
  ]
294
 
295
  files_list = glob.glob("./m_data/model_data/external/*.json")
 
331
  need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
332
  logging.info("Updating the judgement (scheduled update): %s", need_reset)
333
  if need_reset != "1":
 
334
  pass
335
  os.environ[RESET_JUDGEMENT_ENV] = "0"
 
 
336
  aggregate_leaderboard_data()
337
+ restart_space()
338
 
339
  def update_board_():
 
340
  logging.info("Updating the judgement at startup")
341
  aggregate_leaderboard_data()
342
 
343
+ # ---------------------------
344
+ # Main execution
345
+ # ---------------------------
346
  if __name__ == "__main__":
347
  os.environ[RESET_JUDGEMENT_ENV] = "1"
348