David Pomerenke
commited on
Commit
·
92b2164
1
Parent(s):
00e463a
Better map tooltip
Browse files- evals/countries.py +8 -4
- frontend/public/world.geo.json +0 -0
- frontend/src/components/WorldMap.js +17 -4
- results.json +5 -5
evals/countries.py
CHANGED
@@ -2,6 +2,7 @@ import re
|
|
2 |
import xml.etree.ElementTree as ET
|
3 |
from collections import defaultdict
|
4 |
|
|
|
5 |
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
6 |
from language_data.util import data_filename
|
7 |
|
@@ -31,18 +32,21 @@ def population(bcp_47):
|
|
31 |
def make_country_table(language_table):
|
32 |
countries = defaultdict(list)
|
33 |
for lang in language_table.itertuples():
|
34 |
-
for country,
|
35 |
countries[country].append(
|
36 |
{
|
37 |
"name": lang.language_name,
|
38 |
"bcp_47": lang.bcp_47,
|
39 |
-
"population":
|
40 |
"score": lang.average,
|
41 |
}
|
42 |
)
|
43 |
for country, languages in countries.items():
|
44 |
-
|
45 |
-
score =
|
|
|
|
|
|
|
46 |
countries[country] = {
|
47 |
"score": score,
|
48 |
"languages": languages,
|
|
|
2 |
import xml.etree.ElementTree as ET
|
3 |
from collections import defaultdict
|
4 |
|
5 |
+
import pycountry
|
6 |
from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
|
7 |
from language_data.util import data_filename
|
8 |
|
|
|
32 |
def make_country_table(language_table):
|
33 |
countries = defaultdict(list)
|
34 |
for lang in language_table.itertuples():
|
35 |
+
for country, speaker_pop in population(lang.bcp_47).items():
|
36 |
countries[country].append(
|
37 |
{
|
38 |
"name": lang.language_name,
|
39 |
"bcp_47": lang.bcp_47,
|
40 |
+
"population": speaker_pop,
|
41 |
"score": lang.average,
|
42 |
}
|
43 |
)
|
44 |
for country, languages in countries.items():
|
45 |
+
speaker_pop = sum(entry["population"] for entry in languages)
|
46 |
+
score = (
|
47 |
+
sum(entry["score"] * entry["population"] for entry in languages)
|
48 |
+
/ speaker_pop
|
49 |
+
)
|
50 |
countries[country] = {
|
51 |
"score": score,
|
52 |
"languages": languages,
|
frontend/public/world.geo.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
frontend/src/components/WorldMap.js
CHANGED
@@ -2,6 +2,16 @@ import { useRef, useEffect, useState } from 'react'
|
|
2 |
import * as topojson from 'topojson-client'
|
3 |
import * as Plot from '@observablehq/plot'
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
const WorldMap = ({ data }) => {
|
6 |
const containerRef = useRef()
|
7 |
const [mapData, setMapData] = useState()
|
@@ -29,17 +39,20 @@ const WorldMap = ({ data }) => {
|
|
29 |
const score = data.countries[d.properties?.ISO_A2_EH]?.score
|
30 |
return score
|
31 |
},
|
32 |
-
title: d =>
|
|
|
|
|
|
|
|
|
|
|
33 |
tip: true
|
34 |
})
|
35 |
],
|
36 |
color: {
|
37 |
-
|
38 |
unknown: 'gray',
|
39 |
-
// type: 'linear',
|
40 |
label: 'Score',
|
41 |
legend: true,
|
42 |
-
// percent: true,
|
43 |
domain: [0, 0.5]
|
44 |
}
|
45 |
})
|
|
|
2 |
import * as topojson from 'topojson-client'
|
3 |
import * as Plot from '@observablehq/plot'
|
4 |
|
5 |
+
function smoothProgressBar(fraction, { color = false } = {}) {
|
6 |
+
const blocks = ['▏','▎','▍','▌','▋','▊','▉','█'];
|
7 |
+
const width = 10;
|
8 |
+
const totalUnits = width * 8;
|
9 |
+
const filledUnits = Math.round(fraction * totalUnits);
|
10 |
+
const fullBlocks = Math.floor(filledUnits / 8);
|
11 |
+
const remainder = filledUnits % 8;
|
12 |
+
return '█'.repeat(fullBlocks) + (remainder > 0 ? blocks[remainder - 1] : '')
|
13 |
+
}
|
14 |
+
|
15 |
const WorldMap = ({ data }) => {
|
16 |
const containerRef = useRef()
|
17 |
const [mapData, setMapData] = useState()
|
|
|
39 |
const score = data.countries[d.properties?.ISO_A2_EH]?.score
|
40 |
return score
|
41 |
},
|
42 |
+
title: d => {
|
43 |
+
const languages = data.countries[d.properties?.ISO_A2_EH]?.languages.toSorted((a, b) => b.population - a.population)
|
44 |
+
const pop = languages?.map(a => a.population).reduce((prev, a) => prev + a, 0)
|
45 |
+
const langstring = languages?.slice(0, 10).map(a=> `${smoothProgressBar(a.population / pop)} ${a.name}`).join('\n\n') + (languages?.length > 10 ? `\n\n...` : '')
|
46 |
+
return `${d.properties.ADMIN}\n\n${langstring}`
|
47 |
+
},
|
48 |
tip: true
|
49 |
})
|
50 |
],
|
51 |
color: {
|
52 |
+
scheme: 'Blues',
|
53 |
unknown: 'gray',
|
|
|
54 |
label: 'Score',
|
55 |
legend: true,
|
|
|
56 |
domain: [0, 0.5]
|
57 |
}
|
58 |
})
|
results.json
CHANGED
@@ -3048,7 +3048,7 @@
|
|
3048 |
"family": "Eskimo-Aleut",
|
3049 |
"flores_path": null,
|
3050 |
"fleurs_tag": null,
|
3051 |
-
"commonvoice_hours":
|
3052 |
"commonvoice_locale": "esu",
|
3053 |
"in_benchmark": false,
|
3054 |
"task": null,
|
@@ -4584,7 +4584,7 @@
|
|
4584 |
"family": "Eskimo-Aleut",
|
4585 |
"flores_path": null,
|
4586 |
"fleurs_tag": null,
|
4587 |
-
"commonvoice_hours":
|
4588 |
"commonvoice_locale": "ipk",
|
4589 |
"in_benchmark": false,
|
4590 |
"task": null,
|
@@ -5752,7 +5752,7 @@
|
|
5752 |
"family": "Indo-European",
|
5753 |
"flores_path": null,
|
5754 |
"fleurs_tag": null,
|
5755 |
-
"commonvoice_hours": 3
|
5756 |
"commonvoice_locale": "kw",
|
5757 |
"in_benchmark": false,
|
5758 |
"task": null,
|
@@ -9224,7 +9224,7 @@
|
|
9224 |
"family": null,
|
9225 |
"flores_path": null,
|
9226 |
"fleurs_tag": null,
|
9227 |
-
"commonvoice_hours": 1.
|
9228 |
"commonvoice_locale": "sei",
|
9229 |
"in_benchmark": false,
|
9230 |
"task": null,
|
@@ -10952,7 +10952,7 @@
|
|
10952 |
"family": "Austroasiatic",
|
10953 |
"flores_path": "vie_Latn",
|
10954 |
"fleurs_tag": "vi_vn",
|
10955 |
-
"commonvoice_hours":
|
10956 |
"commonvoice_locale": "vi",
|
10957 |
"in_benchmark": true,
|
10958 |
"task": null,
|
|
|
3048 |
"family": "Eskimo-Aleut",
|
3049 |
"flores_path": null,
|
3050 |
"fleurs_tag": null,
|
3051 |
+
"commonvoice_hours": 7.6,
|
3052 |
"commonvoice_locale": "esu",
|
3053 |
"in_benchmark": false,
|
3054 |
"task": null,
|
|
|
4584 |
"family": "Eskimo-Aleut",
|
4585 |
"flores_path": null,
|
4586 |
"fleurs_tag": null,
|
4587 |
+
"commonvoice_hours": 7.2,
|
4588 |
"commonvoice_locale": "ipk",
|
4589 |
"in_benchmark": false,
|
4590 |
"task": null,
|
|
|
5752 |
"family": "Indo-European",
|
5753 |
"flores_path": null,
|
5754 |
"fleurs_tag": null,
|
5755 |
+
"commonvoice_hours": 5.3,
|
5756 |
"commonvoice_locale": "kw",
|
5757 |
"in_benchmark": false,
|
5758 |
"task": null,
|
|
|
9224 |
"family": null,
|
9225 |
"flores_path": null,
|
9226 |
"fleurs_tag": null,
|
9227 |
+
"commonvoice_hours": 1.8,
|
9228 |
"commonvoice_locale": "sei",
|
9229 |
"in_benchmark": false,
|
9230 |
"task": null,
|
|
|
10952 |
"family": "Austroasiatic",
|
10953 |
"flores_path": "vie_Latn",
|
10954 |
"fleurs_tag": "vi_vn",
|
10955 |
+
"commonvoice_hours": 6.0,
|
10956 |
"commonvoice_locale": "vi",
|
10957 |
"in_benchmark": true,
|
10958 |
"task": null,
|