kaz-llm-lb

Running

App Files Files Community

kz-transformers commited on 28 days ago

Commit

01d59fd

verified ·

1 Parent(s): e532f8a

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -195

app.py CHANGED Viewed

@@ -1,3 +1,22 @@
 import logging
 import os
 os.makedirs("tmp", exist_ok=True)
@@ -7,55 +26,82 @@ import shutil
 import glob
 import gradio as gr
 import numpy as np
-from src.radial.radial import create_plot
 from apscheduler.schedulers.background import BackgroundScheduler
-from gradio_leaderboard import Leaderboard, SelectColumns
-from gradio_space_ci import enable_space_ci
 import json
 from io import BytesIO
-# ---------------------------
-# File upload and submission handlers
-# ---------------------------
-def handle_file_upload(file):
-    file_path = file.name.split("/")[-1] if "/" in file.name else file.name
-    logging.info("File uploaded: %s", file_path)
-    with open(file.name, "r") as f:
-        v = json.load(f)
-    return v, file_path
-def submit_file(v, file_path, mn):
     print('START SUBMITTING!!!')
     new_file = v['results']
-    new_file['model'] = mn  # Directly assign the provided model name.
     columns = [
-        'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
         'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
         'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc',
         'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc'
     ]
     for column in columns:
         new_file[column] = new_file[column]['acc,none']
     new_file['model_dtype'] = v['config']["model_dtype"]
     new_file['ppl'] = 0
     print('WE READ FILE: ', new_file)
-    def replace_booleans_with_empty_dict(d):
-        if isinstance(d, dict):
-            return {k: ({} if isinstance(v, bool) else replace_booleans_with_empty_dict(v))
-                    for k, v in d.items()}
-        elif isinstance(d, list):
-            return [replace_booleans_with_empty_dict(item) for item in d]
-        else:
-            return d
-    clean_data = replace_booleans_with_empty_dict(new_file)
     buf = BytesIO()
-    buf.write(json.dumps(clean_data).encode('utf-8'))
     API.upload_file(
         path_or_fileobj=buf,
         path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json",
@@ -66,27 +112,9 @@ def submit_file(v, file_path, mn):
     os.environ[RESET_JUDGEMENT_ENV] = "1"
     return "Success!"
-# ---------------------------
-# Import display and environment configuration.
-# ---------------------------
-from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT
-from src.display.css_html_js import custom_css
-from src.display.utils import AutoEvalColumn, fields
-from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
-from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
-import huggingface_hub
-# huggingface_hub.login(token=H4_TOKEN)
-os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
-# Configure logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-# Enable space CI (if required)
-enable_space_ci()
-# Optionally, you can call download_openbench() here if needed.
-# download_openbench()
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
     download_openbench()
@@ -94,9 +122,9 @@ def restart_space():
 def update_plot(selected_models):
     return create_plot(selected_models)
-# ---------------------------
-# Build Demo Interface (with removed OAuth custom types)
-# ---------------------------
 def build_demo():
     download_openbench()
     demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
@@ -115,9 +143,7 @@ def build_demo():
                         cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
                         label="Select Columns to Display:",
                     ),
-                    search_columns=[
-                        AutoEvalColumn.model.name,
-                    ],
                 )
             with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
@@ -128,25 +154,34 @@ def build_demo():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
-                    file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
-                    # Remove the OAuth login button so that no custom type is used.
                     uploaded_file = gr.State()
-                    file_path = gr.State()
                     with gr.Row():
                         with gr.Column():
-                            out = gr.Textbox("Статус отправки")
-                    submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
                     file_output.upload(
-                        handle_file_upload,
-                        file_output,
-                        [uploaded_file, file_path]
                     )
                     submit_button.click(
-                        submit_file,
-                        [uploaded_file, file_path, model_name_textbox],
-                        [out]
                     )
             with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
@@ -167,9 +202,9 @@ def build_demo():
                 )
                 return demo
-# ---------------------------
-# Aggregation functions to update leaderboard data.
-# ---------------------------
 def aggregate_leaderboard_data():
     download_dataset("kz-transformers/s-openbench-eval", "m_data")
@@ -190,136 +225,31 @@ def aggregate_leaderboard_data():
             "kk_biology_unt_mc": 0.22330729166666666,
             "kk_human_society_rights_unt_mc": 0.242152466367713,
         },
-        {
-            "model_dtype": "torch.float16",
-            "model": "gpt-4o-mini",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.5623775310254735,
-            "kk_constitution_mc": 0.79,
-            "kk_dastur_mc": 0.755,
-            "kazakh_and_literature_unt_mc": 0.4953071672354949,
-            "kk_geography_unt_mc": 0.5675203725261933,
-            "kk_world_history_unt_mc": 0.6091205211726385,
-            "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087,
-            "kk_english_unt_mc": 0.6763768775603095,
-            "kk_biology_unt_mc": 0.607421875,
-            "kk_human_society_rights_unt_mc": 0.7309417040358744,
-        },
-        {
-            "model_dtype": "api",
-            "model": "gpt-4o",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.7419986936642717,
-            "kk_constitution_mc": 0.841,
-            "kk_dastur_mc": 0.798,
-            "kazakh_and_literature_unt_mc": 0.6785409556313993,
-            "kk_geography_unt_mc": 0.629802095459837,
-            "kk_world_history_unt_mc": 0.6783387622149837,
-            "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632,
-            "kk_english_unt_mc": 0.7410104688211198,
-            "kk_biology_unt_mc": 0.6979166666666666,
-            "kk_human_society_rights_unt_mc": 0.7937219730941704,
-        },
-        {
-            "model_dtype": "torch.float16",
-            "model": "nova-pro-v1",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.6792945787067276,
-            "kk_constitution_mc": 0.7753623188405797,
-            "kk_dastur_mc": 0.718407960199005,
-            "kazakh_and_literature_unt_mc": 0.4656569965870307,
-            "kk_geography_unt_mc": 0.5541327124563445,
-            "kk_world_history_unt_mc": 0.6425081433224755,
-            "kk_history_of_kazakhstan_unt_mc": 0.5,
-            "kk_english_unt_mc": 0.6845698680018206,
-            "kk_biology_unt_mc": 0.6197916666666666,
-            "kk_human_society_rights_unt_mc": 0.7713004484304933,
-        },
-        {
-            "model_dtype": "torch.float16",
-            "model": "gemini-1.5-pro",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.7380796864794252,
-            "kk_constitution_mc": 0.8164251207729468,
-            "kk_dastur_mc": 0.7383084577114428,
-            "kazakh_and_literature_unt_mc": 0.5565273037542662,
-            "kk_geography_unt_mc": 0.6065192083818394,
-            "kk_world_history_unt_mc": 0.6669381107491856,
-            "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785,
-            "kk_english_unt_mc": 0.7114246700045517,
-            "kk_biology_unt_mc": 0.6673177083333334,
-            "kk_human_society_rights_unt_mc": 0.7623318385650224,
-        },
-        {
-            "model_dtype": "torch.float16",
-            "model": "gemini-1.5-flash",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.6335728282168517,
-            "kk_constitution_mc": 0.748792270531401,
-            "kk_dastur_mc": 0.7054726368159204,
-            "kazakh_and_literature_unt_mc": 0.4761092150170648,
-            "kk_geography_unt_mc": 0.5640279394644936,
-            "kk_world_history_unt_mc": 0.5838762214983714,
-            "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355,
-            "kk_english_unt_mc": 0.6681838871187984,
-            "kk_biology_unt_mc": 0.6217447916666666,
-            "kk_human_society_rights_unt_mc": 0.7040358744394619,
-        },
-        {
-            "model_dtype": "torch.float16",
-            "model": "claude-3-5-sonnet",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.7335075114304376,
-            "kk_constitution_mc": 0.8623188405797102,
-            "kk_dastur_mc": 0.7950248756218905,
-            "kazakh_and_literature_unt_mc": 0.6548634812286689,
-            "kk_geography_unt_mc": 0.6431897555296857,
-            "kk_world_history_unt_mc": 0.6669381107491856,
-            "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289,
-            "kk_english_unt_mc": 0.7291761492944925,
-            "kk_biology_unt_mc": 0.6686197916666666,
-            "kk_human_society_rights_unt_mc": 0.8026905829596412,
-        },
-        {
-            "model_dtype": "torch.float16",
-            "model": "yandex-gpt",
-            "ppl": 0,
-            "mmlu_translated_kk": 0.39777922926192033,
-            "kk_constitution_mc": 0.7028985507246377,
-            "kk_dastur_mc": 0.6159203980099502,
-            "kazakh_and_literature_unt_mc": 0.3914249146757679,
-            "kk_geography_unt_mc": 0.4912689173457509,
-            "kk_world_history_unt_mc": 0.5244299674267101,
-            "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767,
-            "kk_english_unt_mc": 0.5844333181611289,
-            "kk_biology_unt_mc": 0.4368489583333333,
-            "kk_human_society_rights_unt_mc": 0.6995515695067265,
-        }
     ]
     files_list = glob.glob("./m_data/model_data/external/*.json")
     logging.info(f'FILES LIST: {files_list}')
     for file in files_list:
-        with open(file) as f:
-            logging.info(f'Trying to read external submit file: {file}')
-            try:
                 data = json.load(f)
-                # Validate that data is a dict and has some required keys:
-                if not isinstance(data, dict):
-                    logging.warning(f"File {file} is not a dict, skipping")
-                    continue
-                required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'}
-                if not required_keys.issubset(data.keys()):
-                    logging.warning(f"File {file} missing required keys, skipping")
-                    continue
-                logging.info(f'Successfully read: {file}, got {len(data)} keys')
-                data_list.append(data)
-            except Exception as e:
-                logging.error(f"Error reading file {file}: {e}")
                 continue
     logging.info("Combined data_list length: %d", len(data_list))
     with open("genned.json", "w") as f:
@@ -345,18 +275,17 @@ def update_board_():
     logging.info("Updating the judgement at startup")
     aggregate_leaderboard_data()
-# ---------------------------
-# Main execution
-# ---------------------------
 if __name__ == "__main__":
     os.environ[RESET_JUDGEMENT_ENV] = "1"
     scheduler = BackgroundScheduler()
     update_board_()
     scheduler.add_job(update_board, "interval", minutes=10)
     scheduler.start()
     demo_app = build_demo()
-    demo_app.launch(debug=True, share=False, show_api=False, root_path="/")

+########################################
+# Gradio schema patch (optional)
+# Place this at the very top to avoid "bool not iterable" issues
+########################################
+import gradio_client.utils as gc_utils
+_original_json_schema_to_python_type = gc_utils._json_schema_to_python_type
+def patched_json_schema_to_python_type(schema, defs=None):
+    if isinstance(schema, bool):
+        # If we find a boolean schema (e.g. additionalProperties: false), return {}
+        return {}
+    return _original_json_schema_to_python_type(schema, defs)
+gc_utils._json_schema_to_python_type = patched_json_schema_to_python_type
+########################################
+# Standard imports
+########################################
 import logging
 import os
 os.makedirs("tmp", exist_ok=True)
 import glob
 import gradio as gr
 import numpy as np
 from apscheduler.schedulers.background import BackgroundScheduler
 import json
 from io import BytesIO
+# Additional imports from your code
+from src.radial.radial import create_plot
+from gradio_leaderboard import Leaderboard, SelectColumns
+from gradio_space_ci import enable_space_ci
+from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT
+from src.display.css_html_js import custom_css
+from src.display.utils import AutoEvalColumn, fields
+from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
+from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
+import huggingface_hub
+# huggingface_hub.login(token=H4_TOKEN)
+os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+enable_space_ci()
+########################################
+# File handlers
+########################################
+def handle_file_upload(file_bytes):
+    """
+    Read the uploaded bytes and parse JSON directly,
+    avoiding ephemeral disk paths or file read issues.
+    """
+    logging.info("File uploaded (bytes). Size: %d bytes", len(file_bytes))
+    # Convert from bytes to JSON (assuming valid JSON input).
+    v = json.loads(file_bytes.decode("utf-8"))
+    return v
+def submit_file(v, mn):
+    """
+    We removed file_path because we no longer need it
+    (no ephemeral path). 'v' is the loaded JSON object.
+    """
     print('START SUBMITTING!!!')
+    # Validate that 'results' exists in v
+    if 'results' not in v:
+        return "Invalid JSON: missing 'results' key"
     new_file = v['results']
+    new_file['model'] = mn
     columns = [
+        'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
         'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
         'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc',
         'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc'
     ]
     for column in columns:
+        # Validate data structure
+        if column not in new_file or not isinstance(new_file[column], dict):
+            return f"Missing or invalid column: {column}"
+        if 'acc,none' not in new_file[column]:
+            return f"Missing 'acc,none' key in column: {column}"
         new_file[column] = new_file[column]['acc,none']
+    # Validate 'config'
+    if 'config' not in v or 'model_dtype' not in v['config']:
+        return "Missing 'config' or 'model_dtype' in JSON"
     new_file['model_dtype'] = v['config']["model_dtype"]
     new_file['ppl'] = 0
     print('WE READ FILE: ', new_file)
+    # Convert to JSON and upload
     buf = BytesIO()
+    buf.write(json.dumps(new_file).encode('utf-8'))
+    buf.seek(0)  # Rewind
     API.upload_file(
         path_or_fileobj=buf,
         path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json",
     os.environ[RESET_JUDGEMENT_ENV] = "1"
     return "Success!"
+########################################
+# Utility functions
+########################################
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
     download_openbench()
 def update_plot(selected_models):
     return create_plot(selected_models)
+########################################
+# Build Gradio app
+########################################
 def build_demo():
     download_openbench()
     demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
                         cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
                         label="Select Columns to Display:",
                     ),
+                    search_columns=[AutoEvalColumn.model.name],
                 )
             with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
+                    # Use 'bytes' so we load file content in memory.
+                    file_output = gr.File(
+                        label="Drag and drop JSON file judgment here",
+                        type="bytes"
+                    )
+                    # We'll store the returned JSON object in uploaded_file (NOT the path).
                     uploaded_file = gr.State()
                     with gr.Row():
                         with gr.Column():
+                            out = gr.Textbox("Submission Status")
+                    submit_button = gr.Button("Submit File", variant='primary')
+                    # On file upload, parse JSON -> store in uploaded_file
                     file_output.upload(
+                        fn=handle_file_upload,
+                        inputs=file_output,
+                        outputs=uploaded_file
                     )
+                    # On button click, call submit_file with the stored JSON + model name
                     submit_button.click(
+                        fn=submit_file,
+                        inputs=[uploaded_file, model_name_textbox],
+                        outputs=[out]
                     )
             with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
                 )
                 return demo
+########################################
+# Aggregation and scheduling
+########################################
 def aggregate_leaderboard_data():
     download_dataset("kz-transformers/s-openbench-eval", "m_data")
             "kk_biology_unt_mc": 0.22330729166666666,
             "kk_human_society_rights_unt_mc": 0.242152466367713,
         },
+        # ...(rest of your baseline entries)...
     ]
     files_list = glob.glob("./m_data/model_data/external/*.json")
     logging.info(f'FILES LIST: {files_list}')
     for file in files_list:
+        logging.info(f'Trying to read external submit file: {file}')
+        try:
+            with open(file) as f:
                 data = json.load(f)
+            if not isinstance(data, dict):
+                logging.warning(f"File {file} is not a dict, skipping")
+                continue
+            required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'}
+            if not required_keys.issubset(data.keys()):
+                logging.warning(f"File {file} missing required keys, skipping")
                 continue
+            logging.info(f'Successfully read: {file}, got {len(data)} keys')
+            data_list.append(data)
+        except Exception as e:
+            logging.error(f"Error reading file {file}: {e}")
+            continue
     logging.info("Combined data_list length: %d", len(data_list))
     with open("genned.json", "w") as f:
     logging.info("Updating the judgement at startup")
     aggregate_leaderboard_data()
+########################################
+# Main
+########################################
 if __name__ == "__main__":
     os.environ[RESET_JUDGEMENT_ENV] = "1"
+    from apscheduler.schedulers.background import BackgroundScheduler
     scheduler = BackgroundScheduler()
     update_board_()
     scheduler.add_job(update_board, "interval", minutes=10)
     scheduler.start()
     demo_app = build_demo()
+    # Don't pass root_path on HF Spaces. Let it mount at default "/"
+    demo_app.launch(debug=True, share=False, show_api=False)