David Pomerenke commited on
Commit
003fe33
·
1 Parent(s): 4e09406

Re-add dataset logos

Browse files
Files changed (1) hide show
  1. datasets.json +480 -407
datasets.json CHANGED
@@ -1,411 +1,484 @@
1
  [
2
  {
3
- "name": "FLORES+",
4
- "author": "Meta",
5
- "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
6
- "n_languages": 185,
7
- "tasks": [
8
- "translation",
9
- "classification",
10
- "language_modeling"
11
- ],
12
- "parallel": true,
13
- "base": "FLORES",
14
- "implemented": true
15
- },
16
- {
17
- "name": "FLEURS",
18
- "author": "Meta",
19
- "url": "https://huggingface.co/datasets/google/fleurs",
20
- "n_languages": 102,
21
- "tasks": [
22
- "speech_recognition"
23
- ],
24
- "parallel": true,
25
- "base": "FLORES",
26
- "implemented": true
27
- },
28
- {
29
- "name": "CommonVoice",
30
- "author": "Mozilla",
31
- "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_1_0",
32
- "n_languages": 231,
33
- "tasks": [
34
- "speech_recognition"
35
- ],
36
- "parallel": null
37
- },
38
- {
39
- "name": "MMMLU",
40
- "author": "OpenAI",
41
- "url": "https://huggingface.co/datasets/openai/MMMLU",
42
- "n_languages": "14",
43
- "tasks": [
44
- "question_answering"
45
- ],
46
- "parallel": true,
47
- "base": "MMLU"
48
- },
49
- {
50
- "name": "AfriMMLU",
51
- "author": "Masakhane",
52
- "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
53
- "n_languages": "17",
54
- "tasks": [
55
- "question_answering"
56
- ],
57
- "parallel": true,
58
- "base": "MMLU"
59
- },
60
- {
61
- "name": "Okapi MMLU",
62
- "author": "Okapi",
63
- "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
64
- "n_languages": 16,
65
- "tasks": [
66
- "question_answering"
67
- ],
68
- "parallel": true,
69
- "base": "MMLU"
70
- },
71
- {
72
- "name": "Global MMLU",
73
- "author": "Cohere",
74
- "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
75
- "n_languages": 42,
76
- "tasks": [
77
- "question_answering"
78
- ],
79
- "parallel": true,
80
- "base": "MMLU"
81
- },
82
- {
83
- "name": "MGSM",
84
- "author": "Google",
85
- "url": "https://huggingface.co/datasets/juletxara/mgsm",
86
- "n_languages": 10,
87
- "tasks": [
88
- "math"
89
- ],
90
- "parallel": true,
91
- "base": "MGSM"
92
- },
93
- {
94
- "name": "AfriMGSM",
95
- "author": "Masakhane",
96
- "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
97
- "n_languages": 18,
98
- "tasks": [
99
- "math"
100
- ],
101
- "parallel": true,
102
- "base": "MGSM"
103
- },
104
- {
105
- "name": "Okapi ARC Challenge",
106
- "author": "Okapi",
107
- "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
108
- "n_languages": 31,
109
- "tasks": [
110
- "question_answering"
111
- ],
112
- "parallel": true,
113
- "base": "AI2 ARC"
114
- },
115
- {
116
- "name": "Uhuru ARC Easy",
117
- "author": "Masakhane",
118
- "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
119
- "n_languages": 6,
120
- "tasks": [
121
- "question_answering"
122
- ],
123
- "parallel": true,
124
- "base": "AI2 ARC"
125
- },
126
- {
127
- "name": "Okapi TruthfulQA",
128
- "author": "Okapi",
129
- "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
130
- "n_languages": 31,
131
- "tasks": [
132
- "question_answering"
133
- ],
134
- "parallel": true,
135
- "base": "TruthfulQA"
136
- },
137
- {
138
- "name": "Uhura TruthfulQA",
139
- "author": "Masakhane",
140
- "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
141
- "n_languages": 6,
142
- "tasks": [
143
- "question_answering"
144
- ],
145
- "parallel": true,
146
- "base": "TruthfulQA"
147
- },
148
- {
149
- "name": "XNLI",
150
- "author": "Meta",
151
- "url": "https://huggingface.co/datasets/facebook/xnli",
152
- "n_languages": 14,
153
- "tasks": [
154
- "classification"
155
- ],
156
- "parallel": true,
157
- "base": "XNLI"
158
- },
159
- {
160
- "name": "AfriXNLI",
161
- "author": "Masakhane",
162
- "url": "https://huggingface.co/datasets/masakhane/afrixnli",
163
- "n_languages": 18,
164
- "tasks": [
165
- "classification"
166
- ],
167
- "parallel": true,
168
- "base": "XNLI"
169
- },
170
- {
171
- "name": "IndicXNLI",
172
- "author": "AI4Bharat",
173
- "url": "https://huggingface.co/datasets/Divyanshu/indicxnli",
174
- "n_languages": 11,
175
- "tasks": [
176
- "classification"
177
- ],
178
- "parallel": true,
179
- "base": "XNLI"
180
- },
181
- {
182
- "name": "Okapi HellaSwag",
183
- "author": "Okapi",
184
- "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
185
- "n_languages": 31,
186
- "tasks": [
187
- "question_answering"
188
- ],
189
- "parallel": true,
190
- "base": "HellaSwag"
191
- },
192
- {
193
- "name": "XTREME",
194
- "author": "Google",
195
- "url": "https://huggingface.co/datasets/google/xtreme",
196
- "n_languages": 40,
197
- "tasks": [
198
- "translation",
199
- "classification",
200
- "question_answering",
201
- "ner"
202
- ],
203
- "parallel": null
204
- },
205
- {
206
- "name": "IndicXTREME",
207
- "author": "AI4Bharat",
208
- "url": "https://huggingface.co/collections/ai4bharat/indicxtreme-66c59f576386ba2955650030",
209
- "n_languages": 20,
210
- "tasks": [
211
- "translation",
212
- "classification",
213
- "question_answering",
214
- "ner"
215
- ],
216
- "parallel": null
217
- },
218
- {
219
- "name": "XGLUE",
220
- "author": "Microsoft",
221
- "url": "https://huggingface.co/datasets/microsoft/xglue",
222
- "n_languages": 18,
223
- "tasks": [
224
- "pos"
225
- ],
226
- "parallel": null,
227
- "base": "GLUE"
228
- },
229
- {
230
- "name": "IndicGLUE",
231
- "author": "AI4Bharat",
232
- "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
233
- "n_languages": 11,
234
- "tasks": [
235
- "question_answering"
236
- ],
237
- "parallel": null,
238
- "base": "GLUE"
239
- },
240
- {
241
- "name": "CCAligned",
242
- "author": "Meta",
243
- "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
244
- "n_languages": 137,
245
- "tasks": [
246
- "translation"
247
- ],
248
- "parallel": false
249
- },
250
- {
251
- "name": "Opus Gnome",
252
- "author": "Helsinki NLP",
253
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
254
- "n_languages": 187,
255
- "tasks": [
256
- "translation"
257
- ],
258
- "parallel": true
259
- },
260
- {
261
- "name": "Opus Paracrawl",
262
- "author": "Helsinki NLP",
263
- "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
264
- "n_languages": 43,
265
- "tasks": [
266
- "translation"
267
- ],
268
- "parallel": false
269
- },
270
- {
271
- "name": "OPUS Collection",
272
- "author": "Helsinki NLP",
273
- "url": "https://opus.nlpl.eu/",
274
- "n_languages": 747,
275
- "tasks": [
276
- "translation"
277
- ],
278
- "parallel": false
279
- },
280
- {
281
- "name": "WikiANN / PAN-X",
282
- "author": "Academic",
283
- "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
284
- "n_languages": 176,
285
- "tasks": [
286
- "ner"
287
- ],
288
- "parallel": false
289
- },
290
- {
291
- "name": "MSVAMP",
292
- "author": "Microsoft",
293
- "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
294
- "n_languages": 10,
295
- "tasks": [
296
- "math"
297
- ],
298
- "parallel": true
299
- },
300
- {
301
- "name": "XLSUM",
302
- "author": "Academic",
303
- "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
304
- "n_languages": 45,
305
- "tasks": [
306
- "summarization"
307
- ],
308
- "parallel": true
309
- },
310
- {
311
- "name": "SEA-IFEVAL",
312
- "author": "AI Singapore",
313
- "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
314
- "n_languages": 7,
315
- "tasks": [
316
- "instruction_following"
317
- ],
318
- "parallel": true,
319
- "base": "IFEVAL"
320
- },
321
- {
322
- "name": "MasakhaNER",
323
- "author": "Masakhane",
324
- "url": "https://huggingface.co/datasets/masakhane/masakhaner",
325
- "n_languages": 10,
326
- "tasks": [
327
- "ner"
328
- ],
329
- "parallel": null
330
- },
331
- {
332
- "name": "Multilingual Sentiments",
333
- "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
334
- "n_languages": 12,
335
- "tasks": [
336
- "sentiment_analysis"
337
- ],
338
- "parallel": null
339
- },
340
- {
341
- "name": "CulturaX",
342
- "author": "Academic",
343
- "url": "https://huggingface.co/datasets/uonlp/CulturaX",
344
- "n_languages": 167,
345
- "tasks": [
346
- "language_modeling"
347
- ],
348
- "parallel": false
349
- },
350
- {
351
- "name": "T\u00fclu 3 SFT Mixture",
352
- "author": "AllenAI",
353
- "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
354
- "n_languages": 70,
355
- "tasks": [
356
- "instruction_following"
357
- ],
358
- "parallel": false
359
- },
360
- {
361
- "name": "xP3",
362
- "author": "BigScience",
363
- "url": "https://huggingface.co/datasets/bigscience/xP3",
364
- "n_languages": 46,
365
- "tasks": [
366
- "instruction_following"
367
- ],
368
- "parallel": false
369
- },
370
- {
371
- "name": "Aya",
372
- "author": "Cohere",
373
- "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
374
- "n_languages": 65,
375
- "tasks": [
376
- "instruction_following"
377
- ],
378
- "parallel": null
379
- },
380
- {
381
- "name": "Lanfrica",
382
- "author": "Lanfrica",
383
- "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
384
- "n_languages": 2200,
385
- "tasks": [
386
- "datasets"
387
- ],
388
- "parallel": null
389
- },
390
- {
391
- "name": "HuggingFace Languages",
392
- "author": "HuggingFace",
393
- "url": "https://huggingface.co/languages",
394
- "n_languages": 4680,
395
- "tasks": [
396
- "datasets",
397
- "models"
398
- ],
399
- "parallel": null
400
- },
401
- {
402
- "name": "HuggingFace Multilingual Datasets",
403
- "author": "HuggingFace",
404
- "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
405
- "n_languages": null,
406
- "tasks": [
407
- "datasets"
408
- ],
409
- "parallel": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
  ]
 
1
  [
2
  {
3
+ "name": "FLORES+",
4
+ "author": "Meta",
5
+ "author_url": "https://ai.meta.com",
6
+ "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus",
7
+ "n_languages": 200,
8
+ "tasks": [
9
+ "translation",
10
+ "classification",
11
+ "language_modeling"
12
+ ],
13
+ "parallel": true,
14
+ "base": "FLORES",
15
+ "implemented": true
16
+ },
17
+ {
18
+ "name": "FLEURS",
19
+ "author": "Meta",
20
+ "author_url": "https://ai.meta.com",
21
+ "url": "https://huggingface.co/datasets/google/fleurs",
22
+ "n_languages": 102,
23
+ "tasks": [
24
+ "speech_recognition"
25
+ ],
26
+ "parallel": true,
27
+ "base": "FLORES",
28
+ "implemented": true
29
+ },
30
+ {
31
+ "name": "CommonVoice",
32
+ "author": "Mozilla",
33
+ "author_url": "https://blog.mozilla.ai",
34
+ "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0",
35
+ "n_languages": 124,
36
+ "tasks": [
37
+ "speech_recognition"
38
+ ],
39
+ "parallel": null
40
+ },
41
+ {
42
+ "name": "MMMLU",
43
+ "author": "OpenAI",
44
+ "author_url": "https://openai.com",
45
+ "url": "https://huggingface.co/datasets/openai/MMMLU",
46
+ "n_languages": "14",
47
+ "tasks": [
48
+ "question_answering"
49
+ ],
50
+ "parallel": true,
51
+ "base": "MMLU"
52
+ },
53
+ {
54
+ "name": "AfriMMLU",
55
+ "author": "Masakhane",
56
+ "author_url": "https://www.masakhane.io",
57
+ "url": "https://huggingface.co/datasets/masakhane/afrimmlu",
58
+ "n_languages": "17",
59
+ "tasks": [
60
+ "question_answering"
61
+ ],
62
+ "parallel": true,
63
+ "base": "MMLU"
64
+ },
65
+ {
66
+ "name": "Okapi MMLU",
67
+ "author": "Academic",
68
+ "author_url": null,
69
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu",
70
+ "n_languages": 16,
71
+ "tasks": [
72
+ "question_answering"
73
+ ],
74
+ "parallel": true,
75
+ "base": "MMLU"
76
+ },
77
+ {
78
+ "name": "MMLU-X",
79
+ "author": "OpenGPT-X",
80
+ "author_url": null,
81
+ "url": "https://huggingface.co/datasets/openGPT-X/mmlux",
82
+ "n_languages": 20,
83
+ "tasks": [
84
+ "question_answering"
85
+ ],
86
+ "parallel": true,
87
+ "base": "MMLU"
88
+ },
89
+ {
90
+ "name": "Global MMLU",
91
+ "author": "Cohere",
92
+ "author_url": "https://cohere.com",
93
+ "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU",
94
+ "n_languages": 42,
95
+ "tasks": [
96
+ "question_answering"
97
+ ],
98
+ "parallel": true,
99
+ "base": "MMLU"
100
+ },
101
+ {
102
+ "name": "MGSM",
103
+ "author": "Google",
104
+ "author_url": "https://google.com",
105
+ "url": "https://huggingface.co/datasets/juletxara/mgsm",
106
+ "n_languages": 10,
107
+ "tasks": [
108
+ "math"
109
+ ],
110
+ "parallel": true,
111
+ "base": "MGSM"
112
+ },
113
+ {
114
+ "name": "AfriMGSM",
115
+ "author": "Masakhane",
116
+ "author_url": "https://www.masakhane.io",
117
+ "url": "https://huggingface.co/datasets/masakhane/afrimgsm",
118
+ "n_languages": 18,
119
+ "tasks": [
120
+ "math"
121
+ ],
122
+ "parallel": true,
123
+ "base": "MGSM"
124
+ },
125
+ {
126
+ "name": "GSM8K-X",
127
+ "author": "OpenGPT-X",
128
+ "author_url": null,
129
+ "url": "https://huggingface.co/datasets/openGPT-X/gsm8kx",
130
+ "n_languages": 20,
131
+ "tasks": [
132
+ "math"
133
+ ],
134
+ "parallel": true,
135
+ "base": "MGSM"
136
+ },
137
+ {
138
+ "name": "Okapi ARC Challenge",
139
+ "author": "Academic",
140
+ "author_url": null,
141
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge",
142
+ "n_languages": 31,
143
+ "tasks": [
144
+ "question_answering"
145
+ ],
146
+ "parallel": true,
147
+ "base": "AI2 ARC"
148
+ },
149
+ {
150
+ "name": "Uhuru ARC Easy",
151
+ "author": "Masakhane",
152
+ "author_url": "https://www.masakhane.io",
153
+ "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy",
154
+ "n_languages": 6,
155
+ "tasks": [
156
+ "question_answering"
157
+ ],
158
+ "parallel": true,
159
+ "base": "AI2 ARC"
160
+ },
161
+ {
162
+ "name": "Arc-X",
163
+ "author": "OpenGPT-X",
164
+ "author_url": null,
165
+ "url": "https://huggingface.co/datasets/openGPT-X/arcx",
166
+ "n_languages": 20,
167
+ "tasks": [
168
+ "question_answering"
169
+ ],
170
+ "parallel": true,
171
+ "base": "AI2 ARC"
172
+ },
173
+ {
174
+ "name": "Okapi TruthfulQA",
175
+ "author": "Academic",
176
+ "author_url": null,
177
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data",
178
+ "n_languages": 31,
179
+ "tasks": [
180
+ "question_answering"
181
+ ],
182
+ "parallel": true,
183
+ "base": "TruthfulQA"
184
+ },
185
+ {
186
+ "name": "Uhura TruthfulQA",
187
+ "author": "Masakhane",
188
+ "author_url": "https://www.masakhane.io",
189
+ "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa",
190
+ "n_languages": 6,
191
+ "tasks": [
192
+ "question_answering"
193
+ ],
194
+ "parallel": true,
195
+ "base": "TruthfulQA"
196
+ },
197
+ {
198
+ "name": "TruthfulQA-X",
199
+ "author": "OpenGPT-X",
200
+ "author_url": null,
201
+ "url": "https://huggingface.co/datasets/openGPT-X/truthfulqax",
202
+ "n_languages": 20,
203
+ "tasks": [
204
+ "question_answering"
205
+ ],
206
+ "parallel": true,
207
+ "base": "TruthfulQA"
208
+ },
209
+ {
210
+ "name": "XNLI",
211
+ "author": "Meta",
212
+ "author_url": "https://ai.meta.com",
213
+ "url": "https://huggingface.co/datasets/facebook/xnli",
214
+ "n_languages": 14,
215
+ "tasks": [
216
+ "classification"
217
+ ],
218
+ "parallel": true,
219
+ "base": "XNLI"
220
+ },
221
+ {
222
+ "name": "AfriXNLI",
223
+ "author": "Masakhane",
224
+ "author_url": "https://www.masakhane.io",
225
+ "url": "https://huggingface.co/datasets/masakhane/afrixnli",
226
+ "n_languages": 18,
227
+ "tasks": [
228
+ "classification"
229
+ ],
230
+ "parallel": true,
231
+ "base": "XNLI"
232
+ },
233
+ {
234
+ "name": "Okapi HellaSwag",
235
+ "author": "Academic",
236
+ "author_url": null,
237
+ "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag",
238
+ "n_languages": 31,
239
+ "tasks": [
240
+ "question_answering"
241
+ ],
242
+ "parallel": true,
243
+ "base": "HellaSwag"
244
+ },
245
+ {
246
+ "name": "HellaSwag-X",
247
+ "author": "OpenGPT-X",
248
+ "author_url": null,
249
+ "url": "https://huggingface.co/datasets/openGPT-X/hellaswagx",
250
+ "n_languages": 20,
251
+ "tasks": [
252
+ "question_answering"
253
+ ],
254
+ "parallel": true,
255
+ "base": "HellaSwag"
256
+ },
257
+ {
258
+ "name": "WikiANN / PAN-X",
259
+ "author": "Academic",
260
+ "author_url": null,
261
+ "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann",
262
+ "n_languages": 176,
263
+ "tasks": [
264
+ "ner"
265
+ ],
266
+ "parallel": false
267
+ },
268
+ {
269
+ "name": "MSVAMP",
270
+ "author": "Microsoft",
271
+ "author_url": "https://microsoft.ai",
272
+ "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP",
273
+ "n_languages": 10,
274
+ "tasks": [
275
+ "math"
276
+ ],
277
+ "parallel": true
278
+ },
279
+ {
280
+ "name": "XLSUM",
281
+ "author": "Academic",
282
+ "author_url": null,
283
+ "url": "https://huggingface.co/datasets/csebuetnlp/xlsum",
284
+ "n_languages": 45,
285
+ "tasks": [
286
+ "summarization"
287
+ ],
288
+ "parallel": true
289
+ },
290
+ {
291
+ "name": "SEA-IFEVAL",
292
+ "author": "AI Singapore",
293
+ "author_url": "https://aisingapore.org",
294
+ "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval",
295
+ "n_languages": 7,
296
+ "tasks": [
297
+ "instruction_following"
298
+ ],
299
+ "parallel": true,
300
+ "base": "IFEVAL"
301
+ },
302
+ {
303
+ "name": "XTREME",
304
+ "author": "Google",
305
+ "author_url": "https://google.com",
306
+ "url": "https://huggingface.co/datasets/google/xtreme",
307
+ "n_languages": 40,
308
+ "tasks": [
309
+ "translation",
310
+ "classification",
311
+ "question_answering",
312
+ "ner"
313
+ ],
314
+ "parallel": null
315
+ },
316
+ {
317
+ "name": "XGLUE",
318
+ "author": "Microsoft",
319
+ "author_url": "https://microsoft.ai",
320
+ "url": "https://huggingface.co/datasets/microsoft/xglue",
321
+ "n_languages": 18,
322
+ "tasks": [
323
+ "pos"
324
+ ],
325
+ "parallel": null,
326
+ "base": "GLUE"
327
+ },
328
+ {
329
+ "name": "IndicGLUE",
330
+ "author": "AI4Bharat",
331
+ "author_url": "https://models.ai4bharat.org",
332
+ "url": "https://huggingface.co/datasets/ai4bharat/indic_glue",
333
+ "n_languages": 11,
334
+ "tasks": [
335
+ "question_answering"
336
+ ],
337
+ "parallel": null,
338
+ "base": "GLUE"
339
+ },
340
+ {
341
+ "name": "Opus Gnome",
342
+ "author": "Helsinki NLP",
343
+ "author_url": null,
344
+ "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome",
345
+ "n_languages": 187,
346
+ "tasks": [
347
+ "translation"
348
+ ],
349
+ "parallel": true
350
+ },
351
+ {
352
+ "name": "Opus Paracrawl",
353
+ "author": "Helsinki NLP",
354
+ "author_url": null,
355
+ "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl",
356
+ "n_languages": 43,
357
+ "tasks": [
358
+ "translation"
359
+ ],
360
+ "parallel": false
361
+ },
362
+ {
363
+ "name": "CCAligned",
364
+ "author": "Meta",
365
+ "author_url": "https://ai.meta.com",
366
+ "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual",
367
+ "n_languages": 137,
368
+ "tasks": [
369
+ "translation"
370
+ ],
371
+ "parallel": false
372
+ },
373
+ {
374
+ "name": "OPUS Collection",
375
+ "author": "Helsinki NLP",
376
+ "author_url": null,
377
+ "url": "https://opus.nlpl.eu",
378
+ "n_languages": 747,
379
+ "tasks": [
380
+ "translation"
381
+ ],
382
+ "parallel": false
383
+ },
384
+ {
385
+ "name": "MasakhaNER",
386
+ "author": "Masakhane",
387
+ "author_url": "https://www.masakhane.io",
388
+ "url": "https://huggingface.co/datasets/masakhane/masakhaner",
389
+ "n_languages": 10,
390
+ "tasks": [
391
+ "ner"
392
+ ],
393
+ "parallel": null
394
+ },
395
+ {
396
+ "name": "Multilingual Sentiments",
397
+ "author": "Academic",
398
+ "author_url": null,
399
+ "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments",
400
+ "n_languages": 12,
401
+ "tasks": [
402
+ "sentiment_analysis"
403
+ ],
404
+ "parallel": null
405
+ },
406
+ {
407
+ "name": "CulturaX",
408
+ "author": "Academic",
409
+ "author_url": null,
410
+ "url": "https://huggingface.co/datasets/uonlp/CulturaX",
411
+ "n_languages": 167,
412
+ "tasks": [
413
+ "language_modeling"
414
+ ],
415
+ "parallel": false
416
+ },
417
+ {
418
+ "name": "Tülu 3 SFT Mixture",
419
+ "author": "AllenAI",
420
+ "author_url": "https://allenai.org",
421
+ "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture",
422
+ "n_languages": 70,
423
+ "tasks": [
424
+ "instruction_following"
425
+ ],
426
+ "parallel": false
427
+ },
428
+ {
429
+ "name": "xP3",
430
+ "author": "BigScience",
431
+ "author_url": "https://bigscience.huggingface.co",
432
+ "url": "https://huggingface.co/datasets/bigscience/xP3",
433
+ "n_languages": 46,
434
+ "tasks": [
435
+ "instruction_following"
436
+ ],
437
+ "parallel": false
438
+ },
439
+ {
440
+ "name": "Aya",
441
+ "author": "Cohere",
442
+ "author_url": "https://cohere.com",
443
+ "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset",
444
+ "n_languages": 65,
445
+ "tasks": [
446
+ "instruction_following"
447
+ ],
448
+ "parallel": null
449
+ },
450
+ {
451
+ "name": "Lanfrica",
452
+ "author": "Lanfrica",
453
+ "author_url": "https://lanfrica.com",
454
+ "url": "https://lanfrica.com/records?language=yor&task=machine%20translation",
455
+ "n_languages": 2200,
456
+ "tasks": [
457
+ "datasets"
458
+ ],
459
+ "parallel": null
460
+ },
461
+ {
462
+ "name": "HuggingFace Languages",
463
+ "author": "HuggingFace",
464
+ "author_url": "https://huggingface.co",
465
+ "url": "https://huggingface.co/languages",
466
+ "n_languages": 4680,
467
+ "tasks": [
468
+ "datasets",
469
+ "models"
470
+ ],
471
+ "parallel": null
472
+ },
473
+ {
474
+ "name": "HuggingFace Multilingual Datasets",
475
+ "author": "HuggingFace",
476
+ "author_url": "https://huggingface.co",
477
+ "url": "https://huggingface.co/datasets?other=multilinguality:multilingual",
478
+ "n_languages": 2012,
479
+ "tasks": [
480
+ "datasets"
481
+ ],
482
+ "parallel": false
483
  }
484
  ]