Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,770 Bytes
ea047ad 3adea5e ea047ad 1a6cc70 67741f2 ea047ad 67741f2 ea047ad 3adea5e ea047ad 67741f2 ea047ad 67741f2 ea047ad 67741f2 3adea5e 67741f2 ea047ad 3adea5e ea047ad 3adea5e ea047ad 3adea5e ea047ad 67741f2 ea047ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import os
import subprocess
import asyncio
from yourbench_space.leaderboard_space.env import INIT_MODELS
ON_SPACES = os.environ.get("system") == "spaces"
OUTPUT_DIR = "/data" if ON_SPACES else "."
def create_eval_file(eval_ds_name: str):
task_name = eval_ds_name.replace("/", "_")
subprocess.run(["lighteval", "tasks", "create", "examples/custom_tasks_templates/custom_yourbench_task.py", task_name, eval_ds_name])
async def run_process(args: list) -> dict:
process = await asyncio.create_subprocess_exec(
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
await asyncio.wait_for(process.wait(), timeout=180)
stdout = await process.stdout.read()
stderr = await process.stderr.read()
return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
async def run_evaluations(eval_ds_name: str, org: str) -> list:
task_name = eval_ds_name.replace("/", "_")
tasks = []
for model_name, provider in INIT_MODELS:
args = [
"lighteval",
"endpoint",
"inference-providers",
f"model={model_name},provider={provider}",
f"custom|{task_name}|0|0",
"--custom-tasks",
f"custom_{task_name}_task.py",
"--max-samples",
"30",
"--output-dir",
f"{OUTPUT_DIR}",
"--save-details",
"--results-org",
org,
"--push-to-hub",
]
tasks.append(run_process(args))
# Will capture the task if failed
processes = await asyncio.gather(*tasks, return_exceptions=True)
if all(not isinstance(result, Exception) for result in processes):
return "✅"
return "At least one model failed"
|