kz-transformers commited on
Commit
01d59fd
·
verified ·
1 Parent(s): e532f8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -195
app.py CHANGED
@@ -1,3 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import os
3
  os.makedirs("tmp", exist_ok=True)
@@ -7,55 +26,82 @@ import shutil
7
  import glob
8
  import gradio as gr
9
  import numpy as np
10
- from src.radial.radial import create_plot
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
- from gradio_leaderboard import Leaderboard, SelectColumns
13
- from gradio_space_ci import enable_space_ci
14
  import json
15
  from io import BytesIO
16
 
17
- # ---------------------------
18
- # File upload and submission handlers
19
- # ---------------------------
 
 
 
 
 
 
 
20
 
21
- def handle_file_upload(file):
22
- file_path = file.name.split("/")[-1] if "/" in file.name else file.name
23
- logging.info("File uploaded: %s", file_path)
24
- with open(file.name, "r") as f:
25
- v = json.load(f)
26
- return v, file_path
27
 
28
- def submit_file(v, file_path, mn):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  print('START SUBMITTING!!!')
 
 
 
 
 
30
  new_file = v['results']
31
- new_file['model'] = mn # Directly assign the provided model name.
32
 
33
  columns = [
34
- 'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
35
  'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
36
  'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc',
37
  'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc'
38
  ]
39
-
40
  for column in columns:
 
 
 
 
 
41
  new_file[column] = new_file[column]['acc,none']
42
 
 
 
 
 
43
  new_file['model_dtype'] = v['config']["model_dtype"]
44
  new_file['ppl'] = 0
45
 
46
  print('WE READ FILE: ', new_file)
47
- def replace_booleans_with_empty_dict(d):
48
- if isinstance(d, dict):
49
- return {k: ({} if isinstance(v, bool) else replace_booleans_with_empty_dict(v))
50
- for k, v in d.items()}
51
- elif isinstance(d, list):
52
- return [replace_booleans_with_empty_dict(item) for item in d]
53
- else:
54
- return d
55
 
56
- clean_data = replace_booleans_with_empty_dict(new_file)
57
  buf = BytesIO()
58
- buf.write(json.dumps(clean_data).encode('utf-8'))
 
59
  API.upload_file(
60
  path_or_fileobj=buf,
61
  path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json",
@@ -66,27 +112,9 @@ def submit_file(v, file_path, mn):
66
  os.environ[RESET_JUDGEMENT_ENV] = "1"
67
  return "Success!"
68
 
69
- # ---------------------------
70
- # Import display and environment configuration.
71
- # ---------------------------
72
- from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT
73
- from src.display.css_html_js import custom_css
74
- from src.display.utils import AutoEvalColumn, fields
75
- from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
76
- from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
77
- import huggingface_hub
78
- # huggingface_hub.login(token=H4_TOKEN)
79
-
80
- os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
81
-
82
- # Configure logging
83
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
84
-
85
- # Enable space CI (if required)
86
- enable_space_ci()
87
- # Optionally, you can call download_openbench() here if needed.
88
- # download_openbench()
89
-
90
  def restart_space():
91
  API.restart_space(repo_id=REPO_ID)
92
  download_openbench()
@@ -94,9 +122,9 @@ def restart_space():
94
  def update_plot(selected_models):
95
  return create_plot(selected_models)
96
 
97
- # ---------------------------
98
- # Build Demo Interface (with removed OAuth custom types)
99
- # ---------------------------
100
  def build_demo():
101
  download_openbench()
102
  demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
@@ -115,9 +143,7 @@ def build_demo():
115
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
116
  label="Select Columns to Display:",
117
  ),
118
- search_columns=[
119
- AutoEvalColumn.model.name,
120
- ],
121
  )
122
 
123
  with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
@@ -128,25 +154,34 @@ def build_demo():
128
 
129
  with gr.Column():
130
  model_name_textbox = gr.Textbox(label="Model name")
131
- file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
132
- # Remove the OAuth login button so that no custom type is used.
 
 
 
 
 
 
133
  uploaded_file = gr.State()
134
- file_path = gr.State()
135
  with gr.Row():
136
  with gr.Column():
137
- out = gr.Textbox("Статус отправки")
138
- submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
 
139
 
 
140
  file_output.upload(
141
- handle_file_upload,
142
- file_output,
143
- [uploaded_file, file_path]
144
  )
145
 
 
146
  submit_button.click(
147
- submit_file,
148
- [uploaded_file, file_path, model_name_textbox],
149
- [out]
150
  )
151
 
152
  with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
@@ -167,9 +202,9 @@ def build_demo():
167
  )
168
  return demo
169
 
170
- # ---------------------------
171
- # Aggregation functions to update leaderboard data.
172
- # ---------------------------
173
  def aggregate_leaderboard_data():
174
  download_dataset("kz-transformers/s-openbench-eval", "m_data")
175
 
@@ -190,136 +225,31 @@ def aggregate_leaderboard_data():
190
  "kk_biology_unt_mc": 0.22330729166666666,
191
  "kk_human_society_rights_unt_mc": 0.242152466367713,
192
  },
193
- {
194
- "model_dtype": "torch.float16",
195
- "model": "gpt-4o-mini",
196
- "ppl": 0,
197
- "mmlu_translated_kk": 0.5623775310254735,
198
- "kk_constitution_mc": 0.79,
199
- "kk_dastur_mc": 0.755,
200
- "kazakh_and_literature_unt_mc": 0.4953071672354949,
201
- "kk_geography_unt_mc": 0.5675203725261933,
202
- "kk_world_history_unt_mc": 0.6091205211726385,
203
- "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087,
204
- "kk_english_unt_mc": 0.6763768775603095,
205
- "kk_biology_unt_mc": 0.607421875,
206
- "kk_human_society_rights_unt_mc": 0.7309417040358744,
207
- },
208
- {
209
- "model_dtype": "api",
210
- "model": "gpt-4o",
211
- "ppl": 0,
212
- "mmlu_translated_kk": 0.7419986936642717,
213
- "kk_constitution_mc": 0.841,
214
- "kk_dastur_mc": 0.798,
215
- "kazakh_and_literature_unt_mc": 0.6785409556313993,
216
- "kk_geography_unt_mc": 0.629802095459837,
217
- "kk_world_history_unt_mc": 0.6783387622149837,
218
- "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632,
219
- "kk_english_unt_mc": 0.7410104688211198,
220
- "kk_biology_unt_mc": 0.6979166666666666,
221
- "kk_human_society_rights_unt_mc": 0.7937219730941704,
222
- },
223
- {
224
- "model_dtype": "torch.float16",
225
- "model": "nova-pro-v1",
226
- "ppl": 0,
227
- "mmlu_translated_kk": 0.6792945787067276,
228
- "kk_constitution_mc": 0.7753623188405797,
229
- "kk_dastur_mc": 0.718407960199005,
230
- "kazakh_and_literature_unt_mc": 0.4656569965870307,
231
- "kk_geography_unt_mc": 0.5541327124563445,
232
- "kk_world_history_unt_mc": 0.6425081433224755,
233
- "kk_history_of_kazakhstan_unt_mc": 0.5,
234
- "kk_english_unt_mc": 0.6845698680018206,
235
- "kk_biology_unt_mc": 0.6197916666666666,
236
- "kk_human_society_rights_unt_mc": 0.7713004484304933,
237
- },
238
- {
239
- "model_dtype": "torch.float16",
240
- "model": "gemini-1.5-pro",
241
- "ppl": 0,
242
- "mmlu_translated_kk": 0.7380796864794252,
243
- "kk_constitution_mc": 0.8164251207729468,
244
- "kk_dastur_mc": 0.7383084577114428,
245
- "kazakh_and_literature_unt_mc": 0.5565273037542662,
246
- "kk_geography_unt_mc": 0.6065192083818394,
247
- "kk_world_history_unt_mc": 0.6669381107491856,
248
- "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785,
249
- "kk_english_unt_mc": 0.7114246700045517,
250
- "kk_biology_unt_mc": 0.6673177083333334,
251
- "kk_human_society_rights_unt_mc": 0.7623318385650224,
252
- },
253
- {
254
- "model_dtype": "torch.float16",
255
- "model": "gemini-1.5-flash",
256
- "ppl": 0,
257
- "mmlu_translated_kk": 0.6335728282168517,
258
- "kk_constitution_mc": 0.748792270531401,
259
- "kk_dastur_mc": 0.7054726368159204,
260
- "kazakh_and_literature_unt_mc": 0.4761092150170648,
261
- "kk_geography_unt_mc": 0.5640279394644936,
262
- "kk_world_history_unt_mc": 0.5838762214983714,
263
- "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355,
264
- "kk_english_unt_mc": 0.6681838871187984,
265
- "kk_biology_unt_mc": 0.6217447916666666,
266
- "kk_human_society_rights_unt_mc": 0.7040358744394619,
267
- },
268
- {
269
- "model_dtype": "torch.float16",
270
- "model": "claude-3-5-sonnet",
271
- "ppl": 0,
272
- "mmlu_translated_kk": 0.7335075114304376,
273
- "kk_constitution_mc": 0.8623188405797102,
274
- "kk_dastur_mc": 0.7950248756218905,
275
- "kazakh_and_literature_unt_mc": 0.6548634812286689,
276
- "kk_geography_unt_mc": 0.6431897555296857,
277
- "kk_world_history_unt_mc": 0.6669381107491856,
278
- "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289,
279
- "kk_english_unt_mc": 0.7291761492944925,
280
- "kk_biology_unt_mc": 0.6686197916666666,
281
- "kk_human_society_rights_unt_mc": 0.8026905829596412,
282
- },
283
- {
284
- "model_dtype": "torch.float16",
285
- "model": "yandex-gpt",
286
- "ppl": 0,
287
- "mmlu_translated_kk": 0.39777922926192033,
288
- "kk_constitution_mc": 0.7028985507246377,
289
- "kk_dastur_mc": 0.6159203980099502,
290
- "kazakh_and_literature_unt_mc": 0.3914249146757679,
291
- "kk_geography_unt_mc": 0.4912689173457509,
292
- "kk_world_history_unt_mc": 0.5244299674267101,
293
- "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767,
294
- "kk_english_unt_mc": 0.5844333181611289,
295
- "kk_biology_unt_mc": 0.4368489583333333,
296
- "kk_human_society_rights_unt_mc": 0.6995515695067265,
297
- }
298
  ]
299
 
300
  files_list = glob.glob("./m_data/model_data/external/*.json")
301
  logging.info(f'FILES LIST: {files_list}')
302
 
303
  for file in files_list:
304
- with open(file) as f:
305
- logging.info(f'Trying to read external submit file: {file}')
306
- try:
307
  data = json.load(f)
308
- # Validate that data is a dict and has some required keys:
309
- if not isinstance(data, dict):
310
- logging.warning(f"File {file} is not a dict, skipping")
311
- continue
312
- required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'}
313
- if not required_keys.issubset(data.keys()):
314
- logging.warning(f"File {file} missing required keys, skipping")
315
- continue
316
-
317
- logging.info(f'Successfully read: {file}, got {len(data)} keys')
318
- data_list.append(data)
319
- except Exception as e:
320
- logging.error(f"Error reading file {file}: {e}")
321
  continue
322
 
 
 
 
 
 
 
323
  logging.info("Combined data_list length: %d", len(data_list))
324
 
325
  with open("genned.json", "w") as f:
@@ -345,18 +275,17 @@ def update_board_():
345
  logging.info("Updating the judgement at startup")
346
  aggregate_leaderboard_data()
347
 
348
- # ---------------------------
349
- # Main execution
350
- # ---------------------------
351
  if __name__ == "__main__":
352
  os.environ[RESET_JUDGEMENT_ENV] = "1"
353
-
354
  scheduler = BackgroundScheduler()
355
  update_board_()
356
  scheduler.add_job(update_board, "interval", minutes=10)
357
  scheduler.start()
358
 
359
  demo_app = build_demo()
360
-
361
- demo_app.launch(debug=True, share=False, show_api=False, root_path="/")
362
-
 
1
+ ########################################
2
+ # Gradio schema patch (optional)
3
+ # Place this at the very top to avoid "bool not iterable" issues
4
+ ########################################
5
+ import gradio_client.utils as gc_utils
6
+
7
+ _original_json_schema_to_python_type = gc_utils._json_schema_to_python_type
8
+
9
+ def patched_json_schema_to_python_type(schema, defs=None):
10
+ if isinstance(schema, bool):
11
+ # If we find a boolean schema (e.g. additionalProperties: false), return {}
12
+ return {}
13
+ return _original_json_schema_to_python_type(schema, defs)
14
+
15
+ gc_utils._json_schema_to_python_type = patched_json_schema_to_python_type
16
+
17
+ ########################################
18
+ # Standard imports
19
+ ########################################
20
  import logging
21
  import os
22
  os.makedirs("tmp", exist_ok=True)
 
26
  import glob
27
  import gradio as gr
28
  import numpy as np
 
29
  from apscheduler.schedulers.background import BackgroundScheduler
 
 
30
  import json
31
  from io import BytesIO
32
 
33
+ # Additional imports from your code
34
+ from src.radial.radial import create_plot
35
+ from gradio_leaderboard import Leaderboard, SelectColumns
36
+ from gradio_space_ci import enable_space_ci
37
+ from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT
38
+ from src.display.css_html_js import custom_css
39
+ from src.display.utils import AutoEvalColumn, fields
40
+ from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
41
+ from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
42
+ import huggingface_hub
43
 
44
+ # huggingface_hub.login(token=H4_TOKEN)
 
 
 
 
 
45
 
46
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
47
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
48
+ enable_space_ci()
49
+
50
+ ########################################
51
+ # File handlers
52
+ ########################################
53
+ def handle_file_upload(file_bytes):
54
+ """
55
+ Read the uploaded bytes and parse JSON directly,
56
+ avoiding ephemeral disk paths or file read issues.
57
+ """
58
+ logging.info("File uploaded (bytes). Size: %d bytes", len(file_bytes))
59
+ # Convert from bytes to JSON (assuming valid JSON input).
60
+ v = json.loads(file_bytes.decode("utf-8"))
61
+ return v
62
+
63
+ def submit_file(v, mn):
64
+ """
65
+ We removed file_path because we no longer need it
66
+ (no ephemeral path). 'v' is the loaded JSON object.
67
+ """
68
  print('START SUBMITTING!!!')
69
+
70
+ # Validate that 'results' exists in v
71
+ if 'results' not in v:
72
+ return "Invalid JSON: missing 'results' key"
73
+
74
  new_file = v['results']
75
+ new_file['model'] = mn
76
 
77
  columns = [
78
+ 'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
79
  'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
80
  'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc',
81
  'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc'
82
  ]
83
+
84
  for column in columns:
85
+ # Validate data structure
86
+ if column not in new_file or not isinstance(new_file[column], dict):
87
+ return f"Missing or invalid column: {column}"
88
+ if 'acc,none' not in new_file[column]:
89
+ return f"Missing 'acc,none' key in column: {column}"
90
  new_file[column] = new_file[column]['acc,none']
91
 
92
+ # Validate 'config'
93
+ if 'config' not in v or 'model_dtype' not in v['config']:
94
+ return "Missing 'config' or 'model_dtype' in JSON"
95
+
96
  new_file['model_dtype'] = v['config']["model_dtype"]
97
  new_file['ppl'] = 0
98
 
99
  print('WE READ FILE: ', new_file)
 
 
 
 
 
 
 
 
100
 
101
+ # Convert to JSON and upload
102
  buf = BytesIO()
103
+ buf.write(json.dumps(new_file).encode('utf-8'))
104
+ buf.seek(0) # Rewind
105
  API.upload_file(
106
  path_or_fileobj=buf,
107
  path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json",
 
112
  os.environ[RESET_JUDGEMENT_ENV] = "1"
113
  return "Success!"
114
 
115
+ ########################################
116
+ # Utility functions
117
+ ########################################
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def restart_space():
119
  API.restart_space(repo_id=REPO_ID)
120
  download_openbench()
 
122
  def update_plot(selected_models):
123
  return create_plot(selected_models)
124
 
125
+ ########################################
126
+ # Build Gradio app
127
+ ########################################
128
  def build_demo():
129
  download_openbench()
130
  demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
 
143
  cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
144
  label="Select Columns to Display:",
145
  ),
146
+ search_columns=[AutoEvalColumn.model.name],
 
 
147
  )
148
 
149
  with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
 
154
 
155
  with gr.Column():
156
  model_name_textbox = gr.Textbox(label="Model name")
157
+
158
+ # Use 'bytes' so we load file content in memory.
159
+ file_output = gr.File(
160
+ label="Drag and drop JSON file judgment here",
161
+ type="bytes"
162
+ )
163
+
164
+ # We'll store the returned JSON object in uploaded_file (NOT the path).
165
  uploaded_file = gr.State()
166
+
167
  with gr.Row():
168
  with gr.Column():
169
+ out = gr.Textbox("Submission Status")
170
+
171
+ submit_button = gr.Button("Submit File", variant='primary')
172
 
173
+ # On file upload, parse JSON -> store in uploaded_file
174
  file_output.upload(
175
+ fn=handle_file_upload,
176
+ inputs=file_output,
177
+ outputs=uploaded_file
178
  )
179
 
180
+ # On button click, call submit_file with the stored JSON + model name
181
  submit_button.click(
182
+ fn=submit_file,
183
+ inputs=[uploaded_file, model_name_textbox],
184
+ outputs=[out]
185
  )
186
 
187
  with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
 
202
  )
203
  return demo
204
 
205
+ ########################################
206
+ # Aggregation and scheduling
207
+ ########################################
208
  def aggregate_leaderboard_data():
209
  download_dataset("kz-transformers/s-openbench-eval", "m_data")
210
 
 
225
  "kk_biology_unt_mc": 0.22330729166666666,
226
  "kk_human_society_rights_unt_mc": 0.242152466367713,
227
  },
228
+ # ...(rest of your baseline entries)...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  ]
230
 
231
  files_list = glob.glob("./m_data/model_data/external/*.json")
232
  logging.info(f'FILES LIST: {files_list}')
233
 
234
  for file in files_list:
235
+ logging.info(f'Trying to read external submit file: {file}')
236
+ try:
237
+ with open(file) as f:
238
  data = json.load(f)
239
+ if not isinstance(data, dict):
240
+ logging.warning(f"File {file} is not a dict, skipping")
241
+ continue
242
+ required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'}
243
+ if not required_keys.issubset(data.keys()):
244
+ logging.warning(f"File {file} missing required keys, skipping")
 
 
 
 
 
 
 
245
  continue
246
 
247
+ logging.info(f'Successfully read: {file}, got {len(data)} keys')
248
+ data_list.append(data)
249
+ except Exception as e:
250
+ logging.error(f"Error reading file {file}: {e}")
251
+ continue
252
+
253
  logging.info("Combined data_list length: %d", len(data_list))
254
 
255
  with open("genned.json", "w") as f:
 
275
  logging.info("Updating the judgement at startup")
276
  aggregate_leaderboard_data()
277
 
278
+ ########################################
279
+ # Main
280
+ ########################################
281
  if __name__ == "__main__":
282
  os.environ[RESET_JUDGEMENT_ENV] = "1"
283
+ from apscheduler.schedulers.background import BackgroundScheduler
284
  scheduler = BackgroundScheduler()
285
  update_board_()
286
  scheduler.add_job(update_board, "interval", minutes=10)
287
  scheduler.start()
288
 
289
  demo_app = build_demo()
290
+ # Don't pass root_path on HF Spaces. Let it mount at default "/"
291
+ demo_app.launch(debug=True, share=False, show_api=False)