Spaces:
Running
Running
import gradio_client.utils as gc_utils | |
_original_json_schema_to_python_type = gc_utils._json_schema_to_python_type | |
def patched_json_schema_to_python_type(schema, defs=None): | |
if isinstance(schema, bool): | |
return {} | |
return _original_json_schema_to_python_type(schema, defs) | |
gc_utils._json_schema_to_python_type = patched_json_schema_to_python_type | |
import logging | |
import os | |
os.makedirs("tmp", exist_ok=True) | |
os.environ['TMP_DIR'] = "tmp" | |
import subprocess | |
import shutil | |
import glob | |
import gradio as gr | |
import numpy as np | |
from apscheduler.schedulers.background import BackgroundScheduler | |
import json | |
from io import BytesIO | |
from src.radial.radial import create_plot | |
from gradio_leaderboard import Leaderboard, SelectColumns | |
from gradio_space_ci import enable_space_ci | |
from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT | |
from src.display.css_html_js import custom_css | |
from src.display.utils import AutoEvalColumn, fields | |
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV | |
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset | |
import huggingface_hub | |
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false" | |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
enable_space_ci() | |
def handle_file_upload(file_bytes): | |
""" | |
Read the uploaded bytes and parse JSON directly, | |
avoiding ephemeral disk paths or file read issues. | |
""" | |
logging.info("File uploaded (bytes). Size: %d bytes", len(file_bytes)) | |
v = json.loads(file_bytes.decode("utf-8")) | |
return v | |
def submit_file(v, mn): | |
""" | |
We removed file_path because we no longer need it | |
(no ephemeral path). 'v' is the loaded JSON object. | |
""" | |
print('START SUBMITTING!!!') | |
if 'results' not in v: | |
return "Invalid JSON: missing 'results' key" | |
new_file = v['results'] | |
new_file['model'] = mn | |
columns = [ | |
'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc', | |
'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc', | |
'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc', | |
'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc' | |
] | |
for column in columns: | |
if column not in new_file or not isinstance(new_file[column], dict): | |
return f"Missing or invalid column: {column}" | |
if 'acc,none' not in new_file[column]: | |
return f"Missing 'acc,none' key in column: {column}" | |
new_file[column] = new_file[column]['acc,none'] | |
if 'config' not in v or 'model_dtype' not in v['config']: | |
return "Missing 'config' or 'model_dtype' in JSON" | |
new_file['model_dtype'] = v['config']["model_dtype"] | |
new_file['ppl'] = 0 | |
print('WE READ FILE: ', new_file) | |
buf = BytesIO() | |
buf.write(json.dumps(new_file).encode('utf-8')) | |
buf.seek(0) | |
API.upload_file( | |
path_or_fileobj=buf, | |
path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json", | |
repo_id="kz-transformers/s-openbench-eval", | |
repo_type="dataset", | |
) | |
os.environ[RESET_JUDGEMENT_ENV] = "1" | |
return "Success!" | |
def restart_space(): | |
API.restart_space(repo_id=REPO_ID) | |
download_openbench() | |
def update_plot(selected_models): | |
return create_plot(selected_models) | |
def build_demo(): | |
download_openbench() | |
demo = gr.Blocks(title="Kaz LLM LB", css=custom_css) | |
leaderboard_df = build_leadearboard_df() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Tabs(elem_classes="tab-buttons"): | |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): | |
Leaderboard( | |
value=leaderboard_df, | |
datatype=[c.type for c in fields(AutoEvalColumn)], | |
select_columns=SelectColumns( | |
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], | |
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy], | |
label="Select Columns to Display:", | |
), | |
search_columns=[AutoEvalColumn.model.name], | |
) | |
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3): | |
with gr.Row(): | |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text") | |
with gr.Column(): | |
model_name_textbox = gr.Textbox(label="Model name") | |
file_output = gr.File( | |
label="Drag and drop JSON file judgment here", | |
type="binary" | |
) | |
uploaded_file = gr.State() | |
with gr.Row(): | |
with gr.Column(): | |
out = gr.Textbox("Submission Status") | |
submit_button = gr.Button("Submit File", variant='primary') | |
file_output.upload( | |
fn=handle_file_upload, | |
inputs=file_output, | |
outputs=uploaded_file | |
) | |
submit_button.click( | |
fn=submit_file, | |
inputs=[uploaded_file, model_name_textbox], | |
outputs=[out] | |
) | |
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4): | |
with gr.Column(): | |
model_dropdown = gr.Dropdown( | |
choices=leaderboard_df["model"].tolist(), | |
label="Models", | |
value=leaderboard_df["model"].tolist(), | |
multiselect=True, | |
info="Select models" | |
) | |
with gr.Column(): | |
plot = gr.Plot(update_plot(model_dropdown.value)) | |
model_dropdown.change( | |
fn=update_plot, | |
inputs=[model_dropdown], | |
outputs=[plot] | |
) | |
return demo | |
def aggregate_leaderboard_data(): | |
download_dataset("kz-transformers/s-openbench-eval", "m_data") | |
data_list = [ | |
{ | |
"model_dtype": "torch.float16", | |
"model": "dummy-random-baseline", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.22991508817766165, | |
"kk_constitution_mc": 0.25120772946859904, | |
"kk_dastur_mc": 0.24477611940298508, | |
"kazakh_and_literature_unt_mc": 0.2090443686006826, | |
"kk_geography_unt_mc": 0.2019790454016298, | |
"kk_world_history_unt_mc": 0.1986970684039088, | |
"kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, | |
"kk_english_unt_mc": 0.189804278561675, | |
"kk_biology_unt_mc": 0.22330729166666666, | |
"kk_human_society_rights_unt_mc": 0.242152466367713, | |
}, | |
{ | |
"model_dtype": "torch.float16", | |
"model": "gpt-4o-mini", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.5623775310254735, | |
"kk_constitution_mc": 0.79, | |
"kk_dastur_mc": 0.755, | |
"kazakh_and_literature_unt_mc": 0.4953071672354949, | |
"kk_geography_unt_mc": 0.5675203725261933, | |
"kk_world_history_unt_mc": 0.6091205211726385, | |
"kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, | |
"kk_english_unt_mc": 0.6763768775603095, | |
"kk_biology_unt_mc": 0.607421875, | |
"kk_human_society_rights_unt_mc": 0.7309417040358744, | |
}, | |
{ | |
"model_dtype": "api", | |
"model": "gpt-4o", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.7419986936642717, | |
"kk_constitution_mc": 0.841, | |
"kk_dastur_mc": 0.798, | |
"kazakh_and_literature_unt_mc": 0.6785409556313993, | |
"kk_geography_unt_mc": 0.629802095459837, | |
"kk_world_history_unt_mc": 0.6783387622149837, | |
"kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, | |
"kk_english_unt_mc": 0.7410104688211198, | |
"kk_biology_unt_mc": 0.6979166666666666, | |
"kk_human_society_rights_unt_mc": 0.7937219730941704, | |
}, | |
{ | |
"model_dtype": "torch.float16", | |
"model": "nova-pro-v1", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.6792945787067276, | |
"kk_constitution_mc": 0.7753623188405797, | |
"kk_dastur_mc": 0.718407960199005, | |
"kazakh_and_literature_unt_mc": 0.4656569965870307, | |
"kk_geography_unt_mc": 0.5541327124563445, | |
"kk_world_history_unt_mc": 0.6425081433224755, | |
"kk_history_of_kazakhstan_unt_mc": 0.5, | |
"kk_english_unt_mc": 0.6845698680018206, | |
"kk_biology_unt_mc": 0.6197916666666666, | |
"kk_human_society_rights_unt_mc": 0.7713004484304933, | |
}, | |
{ | |
"model_dtype": "torch.float16", | |
"model": "gemini-1.5-pro", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.7380796864794252, | |
"kk_constitution_mc": 0.8164251207729468, | |
"kk_dastur_mc": 0.7383084577114428, | |
"kazakh_and_literature_unt_mc": 0.5565273037542662, | |
"kk_geography_unt_mc": 0.6065192083818394, | |
"kk_world_history_unt_mc": 0.6669381107491856, | |
"kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, | |
"kk_english_unt_mc": 0.7114246700045517, | |
"kk_biology_unt_mc": 0.6673177083333334, | |
"kk_human_society_rights_unt_mc": 0.7623318385650224, | |
}, | |
{ | |
"model_dtype": "torch.float16", | |
"model": "gemini-1.5-flash", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.6335728282168517, | |
"kk_constitution_mc": 0.748792270531401, | |
"kk_dastur_mc": 0.7054726368159204, | |
"kazakh_and_literature_unt_mc": 0.4761092150170648, | |
"kk_geography_unt_mc": 0.5640279394644936, | |
"kk_world_history_unt_mc": 0.5838762214983714, | |
"kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, | |
"kk_english_unt_mc": 0.6681838871187984, | |
"kk_biology_unt_mc": 0.6217447916666666, | |
"kk_human_society_rights_unt_mc": 0.7040358744394619, | |
}, | |
{ | |
"model_dtype": "torch.float16", | |
"model": "claude-3-5-sonnet", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.7335075114304376, | |
"kk_constitution_mc": 0.8623188405797102, | |
"kk_dastur_mc": 0.7950248756218905, | |
"kazakh_and_literature_unt_mc": 0.6548634812286689, | |
"kk_geography_unt_mc": 0.6431897555296857, | |
"kk_world_history_unt_mc": 0.6669381107491856, | |
"kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, | |
"kk_english_unt_mc": 0.7291761492944925, | |
"kk_biology_unt_mc": 0.6686197916666666, | |
"kk_human_society_rights_unt_mc": 0.8026905829596412, | |
}, | |
{ | |
"model_dtype": "torch.float16", | |
"model": "yandex-gpt", | |
"ppl": 0, | |
"mmlu_translated_kk": 0.39777922926192033, | |
"kk_constitution_mc": 0.7028985507246377, | |
"kk_dastur_mc": 0.6159203980099502, | |
"kazakh_and_literature_unt_mc": 0.3914249146757679, | |
"kk_geography_unt_mc": 0.4912689173457509, | |
"kk_world_history_unt_mc": 0.5244299674267101, | |
"kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, | |
"kk_english_unt_mc": 0.5844333181611289, | |
"kk_biology_unt_mc": 0.4368489583333333, | |
"kk_human_society_rights_unt_mc": 0.6995515695067265, | |
}, | |
] | |
files_list = glob.glob("./m_data/model_data/external/*.json") | |
logging.info(f'FILES LIST: {files_list}') | |
for file in files_list: | |
logging.info(f'Trying to read external submit file: {file}') | |
try: | |
with open(file) as f: | |
data = json.load(f) | |
if not isinstance(data, dict): | |
logging.warning(f"File {file} is not a dict, skipping") | |
continue | |
required_keys = {'model_dtype', 'model', 'ppl', 'mmlu_translated_kk'} | |
if not required_keys.issubset(data.keys()): | |
logging.warning(f"File {file} missing required keys, skipping") | |
continue | |
logging.info(f'Successfully read: {file}, got {len(data)} keys') | |
data_list.append(data) | |
except Exception as e: | |
logging.error(f"Error reading file {file}: {e}") | |
continue | |
logging.info("Combined data_list length: %d", len(data_list)) | |
with open("genned.json", "w") as f: | |
json.dump(data_list, f) | |
API.upload_file( | |
path_or_fileobj="genned.json", | |
path_in_repo="leaderboard.json", | |
repo_id="kz-transformers/kaz-llm-lb-metainfo", | |
repo_type="dataset", | |
) | |
def update_board(): | |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV) | |
logging.info("Updating the judgement (scheduled update): %s", need_reset) | |
if need_reset != "1": | |
pass | |
os.environ[RESET_JUDGEMENT_ENV] = "0" | |
aggregate_leaderboard_data() | |
restart_space() | |
def update_board_(): | |
logging.info("Updating the judgement at startup") | |
aggregate_leaderboard_data() | |
if __name__ == "__main__": | |
os.environ[RESET_JUDGEMENT_ENV] = "1" | |
from apscheduler.schedulers.background import BackgroundScheduler | |
scheduler = BackgroundScheduler() | |
update_board_() | |
scheduler.add_job(update_board, "interval", minutes=10) | |
scheduler.start() | |
demo_app = build_demo() | |
demo_app.launch(debug=True, share=False, show_api=False) | |