File size: 1,264 Bytes
da6e1bc 4d13673 da6e1bc 8274634 da6e1bc 8274634 260c1a3 da6e1bc 3ed02d5 da6e1bc 8274634 da6e1bc ce2acb0 260c1a3 da6e1bc 4d13673 723f963 da6e1bc 4d13673 da6e1bc d91b022 4d13673 da6e1bc 2c21cf7 da6e1bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import asyncio
import json
import numpy as np
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from languages import languages
from models import models
from tasks import tasks
# ===== config =====
n_sentences = 10
n_languages = 40
n_models = 25
# ===== run evaluation and aggregate results =====
async def evaluate():
print("running evaluations")
results = [
task(model, lang.bcp_47, i)
for task in tasks
for i in range(n_sentences)
for lang in languages.iloc[:n_languages].itertuples()
for model in models["id"].iloc[:n_models]
if lang.in_benchmark # TODO
]
return await tqdm_asyncio.gather(*results, miniters=1)
def serialize(df):
return df.replace({np.nan: None, pd.NA: None}).to_dict(orient="records")
async def main():
models["creation_date"] = models["creation_date"].apply(lambda x: x.isoformat())
results = await evaluate()
results = [r for group in results for r in group]
results = {
"languages": serialize(languages),
"models": serialize(models),
"scores": results,
}
with open("results.json", "w") as f:
json.dump(results, f, indent=2, ensure_ascii=False)
if __name__ == "__main__":
asyncio.run(main())
|