static-test / dist /index.html
hynky's picture
hynky HF Staff
iframe
07b0c58
<!DOCTYPE html>
<html>
<head>
<script src="distill.bundle.js" type="module" fetchpriority="high" blocking></script>
<script src="main.bundle.js" type="module" fetchpriority="low" defer></script>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta charset="utf8">
<base target="_blank">
<title>Scaling FineWeb to 1000+ languages: Step 1: finding signal in 100s of evaluation tasks</title>
<link rel="stylesheet" href="style.css">
</head>
<body>
<d-front-matter>
<script id='distill-front-matter' type="text/json">{
"title": "📝 Scaling FineWeb to 1000+ languages: Step 1: finding signal in 100s of evaluation tasks",
"description": "This blog covers a discussion on multilingual evaluation and task signal, the processes for selecting existing evaluation tasks based on signal resulting in FineTasks, and comparisson of open and closed sourced on the FineTasks.",
"published": "Oct 23, 2024",
"affiliation": {"name": "HuggingFace"},
"authors": [
{
"author":"Hynek Kydlíček",
"authorURL":"https://huggingface.co/hynky"
},
{
"author":"Guilherme Penedo",
"authorURL":"https://huggingface.co/guipenedo"
},
{
"author":"Clémentine Fourier",
"authorURL":"https://huggingface.co/clefourrier"
},
{
"author":"Nathan Habib",
"authorURL":"https://huggingface.co/SaylorTwift"
},
{
"author":"Thomas Wolf",
"authorURL":"https://huggingface.co/thomwolf"
}
]
}</script>
</d-front-matter>
<d-byline></d-byline>
<d-article>
<d-contents>
</d-contents>
<p>We're looking forward to revisiting this analysis in the future, not with just 9 languages, but at least 50—thanks to community contributions! Let's level the playing field between English and other languages together! 🤗</p>
<d-math> 1+1=2 </d-math>
</d-article>
<d-appendix>
<d-bibliography src="bibliography.bib"></d-bibliography>
<style>
d-appendix .citation {
font-size: 11px;
line-height: 15px;
border-left: 1px solid rgba(0, 0, 0, 0.1);
padding-left: 18px;
border: 1px solid rgba(0,0,0,0.1);
background: rgba(0, 0, 0, 0.02);
padding: 10px 18px;
border-radius: 3px;
color: rgba(150, 150, 150, 1);
overflow: hidden;
margin-top: -12px;
white-space: pre-wrap;
word-wrap: break-word;
}
</style>
<h3 id="citation">Citation</h3>
<p>For attribution in academic contexts, please cite this work as</p>
<pre class="citation short">Kydlicek, et al., "FineTasks: Finding signal in a haystack of 200+ multilingual tasks", 2024.</pre>
<p>BibTeX citation</p>
<pre class="citation long">@misc{kydlicek2024finetasksmultilingualtasks,
title={FineTasks: Finding signal in a haystack of 200+ multilingual tasks},
author={Hynek Kydlíček and Guilherme Penedo and Clémentine Fourier and Nathan Habib and Thomas Wolf},
url={https://huggingface.co/spaces/HuggingFaceFW/blogpost-fine-tasks},
}</pre>
</d-appendix>
<script>
const article = document.querySelector('d-article');
const toc = document.querySelector('d-contents');
if (toc) {
const headings = article.querySelectorAll('h2, h3, h4');
let ToC = `<nav role="navigation" class="l-text figcaption"><h3>Table of contents</h3>`;
let prevLevel = 0;
for (const el of headings) {
// should element be included in TOC?
const isInTitle = el.parentElement.tagName == 'D-TITLE';
const isException = el.getAttribute('no-toc');
if (isInTitle || isException) continue;
el.setAttribute('id', el.textContent.toLowerCase().replaceAll(" ", "_"))
const link = '<a target="_self" href="' + '#' + el.getAttribute('id') + '">' + el.textContent + '</a>';
const level = el.tagName === 'H2' ? 0 : (el.tagName === 'H3' ? 1 : 2);
while (prevLevel < level) {
ToC += '<ul>'
prevLevel++;
}
while (prevLevel > level) {
ToC += '</ul>'
prevLevel--;
}
if (level === 0)
ToC += '<div>' + link + '</div>';
else
ToC += '<li>' + link + '</li>';
}
while (prevLevel > 0) {
ToC += '</ul>'
prevLevel--;
}
ToC += '</nav>';
toc.innerHTML = ToC;
toc.setAttribute('prerendered', 'true');
const toc_links = document.querySelectorAll('d-contents > nav a');
window.addEventListener('scroll', (_event) => {
if (typeof (headings) != 'undefined' && headings != null && typeof (toc_links) != 'undefined' && toc_links != null) {
// Then iterate forwards, on the first match highlight it and break
find_active: {
for (let i = headings.length - 1; i >= 0; i--) {
if (headings[i].getBoundingClientRect().top - 50 <= 0) {
if (!toc_links[i].classList.contains("active")) {
toc_links.forEach((link, _index) => {
link.classList.remove("active");
});
toc_links[i].classList.add('active');
}
break find_active;
}
}
toc_links.forEach((link, _index) => {
link.classList.remove("active");
});
}
}
});
}
</script>
</body>
</html>