David Pomerenke commited on
Commit
eb1696c
·
1 Parent(s): 566c57e

Fix and refactor backend filtering

Browse files
Files changed (2) hide show
  1. evals/backend.py +73 -5
  2. evals/tables.py +0 -87
evals/backend.py CHANGED
@@ -11,7 +11,74 @@ from fastapi.staticfiles import StaticFiles
11
 
12
  from languages import languages
13
  from models import models
14
- from tables import aggregate, make_country_table, make_language_table, make_model_table
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  app = FastAPI()
17
 
@@ -31,16 +98,17 @@ async def data(request: Request):
31
  body = await request.body()
32
  data = json.loads(body)
33
  selected_languages = data.get("selectedLanguages", {})
34
- df = results
35
- _, lang_results, model_results, task_results = aggregate(df)
 
36
  # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
37
- language_table = make_language_table(lang_results, languages)
38
  datasets_df = pd.read_json("data/datasets.json")
39
  countries = make_country_table(language_table)
40
  if selected_languages:
41
  # the filtering is only applied for the model table
42
  df = df[df["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)]
43
- model_table = make_model_table(model_results, models)
44
  all_tables = {
45
  "model_table": serialize(model_table),
46
  "language_table": serialize(language_table),
 
11
 
12
  from languages import languages
13
  from models import models
14
+ from countries import make_country_table
15
+
16
+ def mean(lst):
17
+ return sum(lst) / len(lst) if lst else None
18
+
19
+
20
+ def make_model_table(df, models):
21
+ df = (
22
+ df.groupby(["model", "task", "metric"])
23
+ .agg({"score": "mean", "bcp_47": "nunique"})
24
+ .reset_index()
25
+ )
26
+ df["task_metric"] = df["task"] + "_" + df["metric"]
27
+ df = df.drop(columns=["task", "metric"])
28
+ task_metrics = df["task_metric"].unique()
29
+ df = df.pivot(index="model", columns="task_metric", values="score").fillna(0)
30
+ df["average"] = df[task_metrics].mean(axis=1)
31
+ df = df.sort_values(by="average", ascending=False).reset_index()
32
+ df = pd.merge(df, models, left_on="model", right_on="id", how="left")
33
+ df["creation_date"] = df["creation_date"].dt.strftime("%Y-%m-%d")
34
+ df["rank"] = df.index + 1
35
+ df = df[
36
+ [
37
+ "rank",
38
+ "model",
39
+ "hf_id",
40
+ "creation_date",
41
+ "size",
42
+ "type",
43
+ "license",
44
+ "average",
45
+ *task_metrics,
46
+ ]
47
+ ]
48
+ return df
49
+
50
+
51
+ def make_language_table(df, languages):
52
+ df = (
53
+ df.groupby(["bcp_47", "task", "metric"])
54
+ .agg({"score": "mean", "model": "nunique"})
55
+ .reset_index()
56
+ )
57
+ df["task_metric"] = df["task"] + "_" + df["metric"]
58
+ df = df.drop(columns=["task", "metric"])
59
+ task_metrics = df["task_metric"].unique()
60
+ df = (
61
+ df.pivot(index="bcp_47", columns="task_metric", values="score")
62
+ .fillna(0)
63
+ .reset_index()
64
+ )
65
+ df["average"] = df[task_metrics].mean(axis=1)
66
+ df = pd.merge(languages, df, on="bcp_47", how="outer")
67
+ df = df.sort_values(by="speakers", ascending=False)
68
+ df = df[
69
+ [
70
+ "bcp_47",
71
+ "language_name",
72
+ "autonym",
73
+ "speakers",
74
+ "family",
75
+ "average",
76
+ "in_benchmark",
77
+ *task_metrics,
78
+ ]
79
+ ]
80
+ return df
81
+
82
 
83
  app = FastAPI()
84
 
 
98
  body = await request.body()
99
  data = json.loads(body)
100
  selected_languages = data.get("selectedLanguages", {})
101
+ df = (
102
+ results.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
103
+ )
104
  # lang_results = pd.merge(languages, lang_results, on="bcp_47", how="outer")
105
+ language_table = make_language_table(df, languages)
106
  datasets_df = pd.read_json("data/datasets.json")
107
  countries = make_country_table(language_table)
108
  if selected_languages:
109
  # the filtering is only applied for the model table
110
  df = df[df["bcp_47"].isin(lang["bcp_47"] for lang in selected_languages)]
111
+ model_table = make_model_table(df, models)
112
  all_tables = {
113
  "model_table": serialize(model_table),
114
  "language_table": serialize(language_table),
evals/tables.py DELETED
@@ -1,87 +0,0 @@
1
- import pandas as pd
2
- from countries import make_country_table
3
-
4
- make_country_table = make_country_table
5
-
6
-
7
- def aggregate(results):
8
- results = (
9
- results.groupby(["model", "bcp_47", "task", "metric"]).mean().reset_index()
10
- )
11
- lang_results = (
12
- results.groupby(["bcp_47", "task", "metric"])
13
- .agg({"score": "mean", "model": "nunique"})
14
- .reset_index()
15
- )
16
- model_results = (
17
- results.groupby(["model", "task", "metric"])
18
- .agg({"score": "mean", "bcp_47": "nunique"})
19
- .reset_index()
20
- )
21
- task_results = (
22
- results.groupby(["task", "metric"])
23
- .agg({"score": "mean", "bcp_47": "nunique", "model": "nunique"})
24
- .reset_index()
25
- )
26
- return results, lang_results, model_results, task_results
27
-
28
-
29
- def mean(lst):
30
- return sum(lst) / len(lst) if lst else None
31
-
32
-
33
- def make_model_table(df, models):
34
- df["task_metric"] = df["task"] + "_" + df["metric"]
35
- df = df.drop(columns=["task", "metric"])
36
- task_metrics = df["task_metric"].unique()
37
- df = df.pivot(index="model", columns="task_metric", values="score").fillna(0)
38
- df["average"] = df[task_metrics].mean(axis=1)
39
- df = df.sort_values(by="average", ascending=False).reset_index()
40
- for row in [*task_metrics, "average"]:
41
- df[row] = df[row].round(2)
42
- df = pd.merge(df, models, left_on="model", right_on="id", how="left")
43
- df["creation_date"] = df["creation_date"].dt.strftime("%Y-%m-%d")
44
- df["rank"] = df.index + 1
45
- df = df[
46
- [
47
- "rank",
48
- "model",
49
- "hf_id",
50
- "creation_date",
51
- "size",
52
- "type",
53
- "license",
54
- "average",
55
- *task_metrics,
56
- ]
57
- ]
58
- return df
59
-
60
-
61
- def make_language_table(df, languages):
62
- df["task_metric"] = df["task"] + "_" + df["metric"]
63
- df = df.drop(columns=["task", "metric"])
64
- task_metrics = df["task_metric"].unique()
65
- df = (
66
- df.pivot(index="bcp_47", columns="task_metric", values="score")
67
- .fillna(0)
68
- .reset_index()
69
- )
70
- df["average"] = df[task_metrics].mean(axis=1)
71
- for row in [*task_metrics, "average"]:
72
- df[row] = df[row].round(2)
73
- df = pd.merge(languages, df, on="bcp_47", how="outer")
74
- df = df.sort_values(by="speakers", ascending=False)
75
- df = df[
76
- [
77
- "bcp_47",
78
- "language_name",
79
- "autonym",
80
- "speakers",
81
- "family",
82
- "average",
83
- "in_benchmark",
84
- *task_metrics,
85
- ]
86
- ]
87
- return df