File size: 1,415 Bytes
da6e1bc 4d13673 da6e1bc 8274634 da6e1bc 8274634 092c06a 260c1a3 da6e1bc 3ed02d5 da6e1bc 2f9dee1 da6e1bc 75010c2 2f9dee1 da6e1bc ce2acb0 2f9dee1 da6e1bc 2f9dee1 d91b022 2f9dee1 da6e1bc 2f9dee1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import asyncio
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from languages import languages
from models import models
from tasks import tasks
# ===== config =====
n_sentences = 10
n_languages = 20
n_models = 25
# ===== run evaluation and aggregate results =====
async def evaluate():
print("running evaluations")
old_results = pd.read_json("results.json")
results = [
task(model, lang.bcp_47, i)
for task_name, task in tasks.items()
for i in range(n_sentences)
for lang in languages.iloc[:n_languages].itertuples()
for model in models["id"].iloc[:n_models]
if len(
old_results[
(old_results["model"] == model)
& (old_results["bcp_47"] == lang.bcp_47)
& (old_results["task"] == task_name)
& (old_results["sentence_nr"] == i)
]
)
== 0
]
results = await tqdm_asyncio.gather(*results, miniters=1)
results = [r for group in results for r in group]
results = pd.DataFrame(results)
results = pd.concat([old_results, results])
args = dict(orient="records", indent=2, force_ascii=False)
results.to_json("results.json", **args)
pd.DataFrame(models).to_json("models.json", **args)
pd.DataFrame(languages).to_json("languages.json", **args)
if __name__ == "__main__":
results = asyncio.run(evaluate())
|