Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,29 +1,3 @@
|
|
1 |
-
from starlette.requests import Request
|
2 |
-
from pydantic_core import core_schema
|
3 |
-
|
4 |
-
@classmethod
|
5 |
-
def request_core_schema(cls, source, handler):
|
6 |
-
# Return a basic string core schema without a default.
|
7 |
-
return core_schema.str_schema()
|
8 |
-
|
9 |
-
@classmethod
|
10 |
-
def request_json_schema(cls, core_schema_value, handler):
|
11 |
-
# Return an explicit JSON Schema for Request objects.
|
12 |
-
return {
|
13 |
-
"type": "string",
|
14 |
-
"title": "Request",
|
15 |
-
"description": "A Request object is represented as a string.",
|
16 |
-
"default": ""
|
17 |
-
}
|
18 |
-
|
19 |
-
# Patch the Request class with these methods.
|
20 |
-
Request.__get_pydantic_core_schema__ = request_core_schema
|
21 |
-
Request.__get_pydantic_json_schema__ = request_json_schema
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
import pydantic
|
26 |
-
print("!!!! PYDANTIC VERSION", pydantic.__version__)
|
27 |
import logging
|
28 |
import os
|
29 |
os.makedirs("tmp", exist_ok=True)
|
@@ -40,30 +14,34 @@ from gradio_space_ci import enable_space_ci
|
|
40 |
import json
|
41 |
from io import BytesIO
|
42 |
|
|
|
|
|
|
|
|
|
43 |
def handle_file_upload(file):
|
44 |
file_path = file.name.split("/")[-1] if "/" in file.name else file.name
|
45 |
logging.info("File uploaded: %s", file_path)
|
46 |
with open(file.name, "r") as f:
|
47 |
v = json.load(f)
|
48 |
return v, file_path
|
49 |
-
|
|
|
|
|
|
|
50 |
print('START SUBMITTING!!!')
|
51 |
-
|
52 |
-
return "Hub Login Required"
|
53 |
-
print('PROFILE: ', profile.__dict__)
|
54 |
-
print('MN:', mn)
|
55 |
new_file = v['results']
|
56 |
-
|
57 |
-
new_file['model'] = mn
|
58 |
-
else:
|
59 |
-
new_file['model'] = profile.username + "/" + mn
|
60 |
|
|
|
61 |
columns = [
|
62 |
-
'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
|
63 |
-
'
|
64 |
-
'
|
|
|
65 |
]
|
66 |
|
|
|
67 |
for column in columns:
|
68 |
new_file[column] = new_file[column]['acc,none']
|
69 |
|
@@ -73,25 +51,22 @@ def submit_file(v, file_path, mn, profile):
|
|
73 |
print('WE READ FILE: ', new_file)
|
74 |
buf = BytesIO()
|
75 |
buf.write(json.dumps(new_file).encode('utf-8'))
|
|
|
76 |
API.upload_file(
|
77 |
path_or_fileobj=buf,
|
78 |
-
path_in_repo="model_data/external/" +
|
79 |
repo_id="kz-transformers/s-openbench-eval",
|
80 |
repo_type="dataset",
|
81 |
)
|
82 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
83 |
return "Success!"
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
LLM_BENCHMARKS_TEXT
|
89 |
-
)
|
90 |
from src.display.css_html_js import custom_css
|
91 |
-
from src.display.utils import
|
92 |
-
AutoEvalColumn,
|
93 |
-
fields,
|
94 |
-
)
|
95 |
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
|
96 |
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
|
97 |
import huggingface_hub
|
@@ -102,9 +77,9 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
|
|
102 |
# Configure logging
|
103 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
104 |
|
105 |
-
#
|
106 |
enable_space_ci()
|
107 |
-
|
108 |
# download_openbench()
|
109 |
|
110 |
def restart_space():
|
@@ -114,6 +89,9 @@ def restart_space():
|
|
114 |
def update_plot(selected_models):
|
115 |
return create_plot(selected_models)
|
116 |
|
|
|
|
|
|
|
117 |
def build_demo():
|
118 |
download_openbench()
|
119 |
demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
|
@@ -134,16 +112,9 @@ def build_demo():
|
|
134 |
),
|
135 |
search_columns=[
|
136 |
AutoEvalColumn.model.name,
|
137 |
-
# AutoEvalColumn.fullname.name,
|
138 |
-
# AutoEvalColumn.license.name
|
139 |
],
|
140 |
)
|
141 |
|
142 |
-
# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
|
143 |
-
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
144 |
-
# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
|
145 |
-
# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
|
146 |
-
|
147 |
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
|
148 |
with gr.Row():
|
149 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
@@ -151,22 +122,14 @@ def build_demo():
|
|
151 |
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
|
152 |
|
153 |
with gr.Column():
|
154 |
-
|
155 |
model_name_textbox = gr.Textbox(label="Model name")
|
156 |
-
# submitter_username = gr.Textbox(label="Username")
|
157 |
-
|
158 |
-
# def toggle_upload_button(model_name, username):
|
159 |
-
# return bool(model_name) and bool(username)
|
160 |
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
|
161 |
-
#
|
162 |
uploaded_file = gr.State()
|
163 |
file_path = gr.State()
|
164 |
with gr.Row():
|
165 |
with gr.Column():
|
166 |
out = gr.Textbox("Статус отправки")
|
167 |
-
with gr.Column():
|
168 |
-
login_button = gr.LoginButton(elem_id="oauth-button")
|
169 |
-
|
170 |
submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
|
171 |
|
172 |
file_output.upload(
|
@@ -182,113 +145,26 @@ def build_demo():
|
|
182 |
)
|
183 |
|
184 |
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
)
|
193 |
-
with gr.Column():
|
194 |
-
plot = gr.Plot(update_plot(model_dropdown.value))
|
195 |
-
# plot = gr.Plot()
|
196 |
-
model_dropdown.change(
|
197 |
-
fn=update_plot,
|
198 |
-
inputs=[model_dropdown],
|
199 |
-
outputs=[plot]
|
200 |
)
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
# need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
210 |
-
# logging.info("Updating the judgement: %s", need_reset)
|
211 |
-
# if need_reset != "1":
|
212 |
-
# # return
|
213 |
-
# pass
|
214 |
-
# os.environ[RESET_JUDGEMENT_ENV] = "0"
|
215 |
-
|
216 |
-
# # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
|
217 |
-
# # recursively. In this specific context, it is used to delete the directory named "m_data" along
|
218 |
-
# # with all its files and subdirectories. This command helps in cleaning up the existing data in
|
219 |
-
# # the "m_data" directory before downloading new dataset files into it.
|
220 |
-
# # shutil.rmtree("./m_data")
|
221 |
-
# # shutil.rmtree("./data")
|
222 |
-
# download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
223 |
-
# data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
|
224 |
-
# files_list = glob.glob("./m_data/model_data/external/*.json")
|
225 |
-
# print(f'FILES LIST: {files_list}')
|
226 |
-
# for file in files_list:
|
227 |
-
# with open(file) as f:
|
228 |
-
# print(f'trying to read external submit file: {file}')
|
229 |
-
# try:
|
230 |
-
# data = json.load(f)
|
231 |
-
# print(f'succeed to read: {file}, got {len(data)}')
|
232 |
-
# data_list.append(data)
|
233 |
-
# except Exception as e:
|
234 |
-
# pass # data was badly formatted, should not fail
|
235 |
-
# print("DATALIST: ", data_list)
|
236 |
-
|
237 |
-
# with open("genned.json", "w") as f:
|
238 |
-
# json.dump(data_list, f)
|
239 |
-
|
240 |
-
|
241 |
-
# API.upload_file(
|
242 |
-
# path_or_fileobj="genned.json",
|
243 |
-
# path_in_repo="leaderboard.json",
|
244 |
-
# repo_id="kz-transformers/kaz-llm-lb-metainfo",
|
245 |
-
# repo_type="dataset",
|
246 |
-
# )
|
247 |
-
# restart_space()
|
248 |
-
|
249 |
-
|
250 |
-
# # gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
|
251 |
-
# # subprocess.run(["python3", gen_judgement_file], check=True)
|
252 |
-
|
253 |
-
# def update_board_():
|
254 |
-
# need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
255 |
-
# logging.info("Updating the judgement: %s", need_reset)
|
256 |
-
# if need_reset != "1":
|
257 |
-
# # return
|
258 |
-
# pass
|
259 |
-
# os.environ[RESET_JUDGEMENT_ENV] = "0"
|
260 |
-
|
261 |
-
# # `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
|
262 |
-
# # recursively. In this specific context, it is used to delete the directory named "m_data" along
|
263 |
-
# # with all its files and subdirectories. This command helps in cleaning up the existing data in
|
264 |
-
# # the "m_data" directory before downloading new dataset files into it.
|
265 |
-
# # shutil.rmtree("./m_data")
|
266 |
-
# # shutil.rmtree("./data")
|
267 |
-
# download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
268 |
-
# data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
|
269 |
-
# files_list = glob.glob("./m_data/model_data/external/*.json")
|
270 |
-
# print(f'FILES LIST: {files_list}')
|
271 |
-
# for file in files_list:
|
272 |
-
# with open(file) as f:
|
273 |
-
# print(f'trying to read external submit file: {file}')
|
274 |
-
# try:
|
275 |
-
# data = json.load(f)
|
276 |
-
# print(f'succeed to read: {file}, got {len(data)}')
|
277 |
-
# data_list.append(data)
|
278 |
-
# except Exception as e:
|
279 |
-
# pass # data was badly formatted, should not fail
|
280 |
-
# print("DATALIST: ", data_list)
|
281 |
-
|
282 |
-
# with open("genned.json", "w") as f:
|
283 |
-
# json.dump(data_list, f)
|
284 |
-
|
285 |
-
# API.upload_file(
|
286 |
-
# path_or_fileobj="genned.json",
|
287 |
-
# path_in_repo="leaderboard.json",
|
288 |
-
# repo_id="kz-transformers/kaz-llm-lb-metainfo",
|
289 |
-
# repo_type="dataset",
|
290 |
-
# )
|
291 |
|
|
|
|
|
|
|
292 |
def aggregate_leaderboard_data():
|
293 |
download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
294 |
|
@@ -413,7 +289,7 @@ def aggregate_leaderboard_data():
|
|
413 |
"kk_english_unt_mc": 0.5844333181611289,
|
414 |
"kk_biology_unt_mc": 0.4368489583333333,
|
415 |
"kk_human_society_rights_unt_mc": 0.6995515695067265,
|
416 |
-
}
|
417 |
]
|
418 |
|
419 |
files_list = glob.glob("./m_data/model_data/external/*.json")
|
@@ -455,20 +331,18 @@ def update_board():
|
|
455 |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
456 |
logging.info("Updating the judgement (scheduled update): %s", need_reset)
|
457 |
if need_reset != "1":
|
458 |
-
# Optionally return early if no update is needed
|
459 |
pass
|
460 |
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
461 |
-
|
462 |
-
# Use the common aggregation function
|
463 |
aggregate_leaderboard_data()
|
464 |
-
restart_space()
|
465 |
|
466 |
def update_board_():
|
467 |
-
# Startup update (without restart)
|
468 |
logging.info("Updating the judgement at startup")
|
469 |
aggregate_leaderboard_data()
|
470 |
|
471 |
-
|
|
|
|
|
472 |
if __name__ == "__main__":
|
473 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
474 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
os.makedirs("tmp", exist_ok=True)
|
|
|
14 |
import json
|
15 |
from io import BytesIO
|
16 |
|
17 |
+
# ---------------------------
|
18 |
+
# File upload and submission handlers
|
19 |
+
# ---------------------------
|
20 |
+
|
21 |
def handle_file_upload(file):
|
22 |
file_path = file.name.split("/")[-1] if "/" in file.name else file.name
|
23 |
logging.info("File uploaded: %s", file_path)
|
24 |
with open(file.name, "r") as f:
|
25 |
v = json.load(f)
|
26 |
return v, file_path
|
27 |
+
|
28 |
+
def submit_file(v, file_path, mn):
|
29 |
+
# We remove the use of a "profile" custom type.
|
30 |
+
# Instead, simply assign the model name directly.
|
31 |
print('START SUBMITTING!!!')
|
32 |
+
# In this version, we assume no authentication is necessary.
|
|
|
|
|
|
|
33 |
new_file = v['results']
|
34 |
+
new_file['model'] = mn # Directly assign the provided model name.
|
|
|
|
|
|
|
35 |
|
36 |
+
# List of expected columns
|
37 |
columns = [
|
38 |
+
'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc',
|
39 |
+
'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
|
40 |
+
'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc',
|
41 |
+
'kk_english_unt_mc', 'kk_biology_unt_mc', 'kk_human_society_rights_unt_mc'
|
42 |
]
|
43 |
|
44 |
+
# Process each column to extract the accuracy value.
|
45 |
for column in columns:
|
46 |
new_file[column] = new_file[column]['acc,none']
|
47 |
|
|
|
51 |
print('WE READ FILE: ', new_file)
|
52 |
buf = BytesIO()
|
53 |
buf.write(json.dumps(new_file).encode('utf-8'))
|
54 |
+
# Upload the processed file (assuming API is imported from src.envs)
|
55 |
API.upload_file(
|
56 |
path_or_fileobj=buf,
|
57 |
+
path_in_repo="model_data/external/" + mn.replace('/', '__') + ".json",
|
58 |
repo_id="kz-transformers/s-openbench-eval",
|
59 |
repo_type="dataset",
|
60 |
)
|
61 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
62 |
return "Success!"
|
63 |
|
64 |
+
# ---------------------------
|
65 |
+
# Import display and environment configuration.
|
66 |
+
# ---------------------------
|
67 |
+
from src.display.about import INTRODUCTION_TEXT, TITLE, LLM_BENCHMARKS_TEXT
|
|
|
68 |
from src.display.css_html_js import custom_css
|
69 |
+
from src.display.utils import AutoEvalColumn, fields
|
|
|
|
|
|
|
70 |
from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
|
71 |
from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
|
72 |
import huggingface_hub
|
|
|
77 |
# Configure logging
|
78 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
79 |
|
80 |
+
# Enable space CI (if required)
|
81 |
enable_space_ci()
|
82 |
+
# Optionally, you can call download_openbench() here if needed.
|
83 |
# download_openbench()
|
84 |
|
85 |
def restart_space():
|
|
|
89 |
def update_plot(selected_models):
|
90 |
return create_plot(selected_models)
|
91 |
|
92 |
+
# ---------------------------
|
93 |
+
# Build Demo Interface (with removed OAuth custom types)
|
94 |
+
# ---------------------------
|
95 |
def build_demo():
|
96 |
download_openbench()
|
97 |
demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
|
|
|
112 |
),
|
113 |
search_columns=[
|
114 |
AutoEvalColumn.model.name,
|
|
|
|
|
115 |
],
|
116 |
)
|
117 |
|
|
|
|
|
|
|
|
|
|
|
118 |
with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
|
119 |
with gr.Row():
|
120 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
122 |
gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
|
123 |
|
124 |
with gr.Column():
|
|
|
125 |
model_name_textbox = gr.Textbox(label="Model name")
|
|
|
|
|
|
|
|
|
126 |
file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
|
127 |
+
# Remove the OAuth login button so that no custom type is used.
|
128 |
uploaded_file = gr.State()
|
129 |
file_path = gr.State()
|
130 |
with gr.Row():
|
131 |
with gr.Column():
|
132 |
out = gr.Textbox("Статус отправки")
|
|
|
|
|
|
|
133 |
submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')
|
134 |
|
135 |
file_output.upload(
|
|
|
145 |
)
|
146 |
|
147 |
with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
|
148 |
+
with gr.Column():
|
149 |
+
model_dropdown = gr.Dropdown(
|
150 |
+
choices=leaderboard_df["model"].tolist(),
|
151 |
+
label="Models",
|
152 |
+
value=leaderboard_df["model"].tolist(),
|
153 |
+
multiselect=True,
|
154 |
+
info="Select models"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
)
|
156 |
+
with gr.Column():
|
157 |
+
plot = gr.Plot(update_plot(model_dropdown.value))
|
158 |
+
model_dropdown.change(
|
159 |
+
fn=update_plot,
|
160 |
+
inputs=[model_dropdown],
|
161 |
+
outputs=[plot]
|
162 |
+
)
|
163 |
+
return demo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
+
# ---------------------------
|
166 |
+
# Aggregation functions to update leaderboard data.
|
167 |
+
# ---------------------------
|
168 |
def aggregate_leaderboard_data():
|
169 |
download_dataset("kz-transformers/s-openbench-eval", "m_data")
|
170 |
|
|
|
289 |
"kk_english_unt_mc": 0.5844333181611289,
|
290 |
"kk_biology_unt_mc": 0.4368489583333333,
|
291 |
"kk_human_society_rights_unt_mc": 0.6995515695067265,
|
292 |
+
}
|
293 |
]
|
294 |
|
295 |
files_list = glob.glob("./m_data/model_data/external/*.json")
|
|
|
331 |
need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
|
332 |
logging.info("Updating the judgement (scheduled update): %s", need_reset)
|
333 |
if need_reset != "1":
|
|
|
334 |
pass
|
335 |
os.environ[RESET_JUDGEMENT_ENV] = "0"
|
|
|
|
|
336 |
aggregate_leaderboard_data()
|
337 |
+
restart_space()
|
338 |
|
339 |
def update_board_():
|
|
|
340 |
logging.info("Updating the judgement at startup")
|
341 |
aggregate_leaderboard_data()
|
342 |
|
343 |
+
# ---------------------------
|
344 |
+
# Main execution
|
345 |
+
# ---------------------------
|
346 |
if __name__ == "__main__":
|
347 |
os.environ[RESET_JUDGEMENT_ENV] = "1"
|
348 |
|