Jimin Huang
feature: add logoes
04f06ea
import { Box, Typography } from "@mui/material";
const createTooltipContent = (title, items) => (
<Box sx={{ maxWidth: 400 }}>
<Typography variant="body2" paragraph sx={{ mb: 1, color: "inherit" }}>
{title}
</Typography>
<Box component="ul" sx={{ m: 0, pl: 2 }}>
{items.map(({ label, description, subItems }, index) => (
<li key={index}>
<Typography variant="body2" sx={{ mb: 0.5, color: "inherit" }}>
<b>{label}</b>: {description}
{subItems && (
<Box component="ul" sx={{ mt: 0.5, mb: 1 }}>
{subItems.map((item, subIndex) => (
<li key={subIndex}>
<Typography variant="body2" sx={{ color: "inherit" }}>
{item}
</Typography>
</li>
))}
</Box>
)}
</Typography>
</li>
))}
</Box>
</Box>
);
export const COLUMN_TOOLTIPS = {
AVERAGE: createTooltipContent("Average score across all benchmarks:", [
{
label: "Calculation",
description: "Weighted average of normalized scores from all benchmarks",
subItems: [
"Each benchmark is normalized to a 0-100 scale",
"All normalised benchmarks are then averaged together",
],
},
]),
MULTIFIN: createTooltipContent("Multilingual Financial NLP (MultiFin):", [
{
label: "Purpose",
description:
"Tests model's ability to understand real-world financial article headlines",
subItems: ["Language Understanding", "Topic Classification"],
},
{
label: "Scoring: ACC Norm",
description: "Was the correct choice selected among the options.",
},
]),
QA: createTooltipContent("Greek Financial Question Answering (QA):", [
{
label: "Purpose",
description:
"Tests model's ability to understand real-world Greek financial exam questions",
subItems: ["Language Understanding", "Question Answering"],
},
{
label: "Scoring: ACC Norm",
description: "Was the correct choice selected among the options.",
},
]),
FNS: createTooltipContent("Financial Narrative Summarization (FNS):", [
{
label: "Purpose",
description:
"Tests model's ability to summarize real-world financial annual reports",
subItems: ["Language Understanding", "Abstractive Summarization"],
},
{
label: "Scoring: Rouge1",
description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.",
},
]),
FinNum: createTooltipContent("Financial Number Entity Recognition (FinNum):", [
{
label: "Purpose",
description:
"Tests model's ability to recognize numbers from financial texts",
subItems: ["Language Understanding", "Named Entity Recognition"],
},
{
label: "Scoring: Rouge1",
description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.",
},
]),
FinText: createTooltipContent("Financial Named Entity Recognition (FinText):", [
{
label: "Purpose",
description:
"Tests model's ability to recognize financial entities, i.e, person, location, and organization",
subItems: ["Language Understanding", "Named Entity Recognition"],
},
{
label: "Scoring: Rouge1",
description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.",
},
]),
ARCHITECTURE: createTooltipContent("Model Architecture Information:", [
{
label: "Definition",
description: "The fundamental structure and design of the model",
subItems: [
"Pretrained: Foundational models, initially trained on large datasets without task-specific tuning, serving as a versatile base for further development.",
"Continuously Pretrained: Base models trained with a data mix evolving as the model is trained, with the addition of specialized data during the last training steps.",
"Fine-tuned: Base models, fine-tuned on specialised domain data (legal, medical, ...), and optimized for particular tasks.",
"Chat: Models fine-tuned with IFT, RLHF, DPO, and other techniques, to handle conversational contexts effectively.",
"Merged: Combining multiple models through weights averaging or similar methods.",
"Multimodal: Models which can handle several modalities (text & image/audio/video/...). We only evaluate the text capabilities.",
],
},
{
label: "Impact",
description: "How architecture affects model capabilities",
subItems: [
"Base models are expected to perform less well on instruction following evaluations, like IFEval.",
"Fine-tuned and chat models can be more verbose and more chatty than base models.",
"Merged models tend to exhibit good performance on benchmarks, which do not translate to real-world situations.",
],
},
]),
PRECISION: createTooltipContent("Numerical Precision Format:", [
{
label: "Overview",
description:
"Data format used to store model weights and perform computations",
subItems: [
"bfloat16: Half precision (Brain Float format), good for stability",
"float16: Half precision",
"8bit/4bit: Quantized formats, for efficiency",
"GPTQ/AWQ: Quantized methods",
],
},
{
label: "Impact",
description: "How precision affects model deployment",
subItems: [
"Higher precision = better accuracy but more memory usage",
"Lower precision = faster inference and smaller size",
"Trade-off between model quality and resource usage",
],
},
]),
FLAGS: createTooltipContent("Model Flags and Special Features:", [
{
label: "Filters",
subItems: [
"Mixture of Expert: Uses a MoE architecture",
"Merged models: Created by averaging other models",
"Contaminated: Flagged by users from the community for (possibly accidental) cheating",
"Unavailable: No longer on the hub (private, deleted) or missing a license tag",
],
},
{
label: "Purpose",
description: "Why do people want to hide these models?",
subItems: [
"Mixture of Experts: These models can be too parameter heavy",
"Merged models: Performance on benchmarks tend to be inflated compared to real life usage",
"Contaminated: Performance on benchmarks is inflated and not reflecting real life usage",
],
},
]),
PARAMETERS: createTooltipContent("Model Parameters:", [
{
label: "Measurement",
description: "Total number of trainable parameters in billions",
subItems: [
"Indicates model capacity and complexity",
"Correlates with computational requirements",
"Influences memory usage and inference speed",
],
},
]),
LICENSE: createTooltipContent("Model License Information:", [
{
label: "Importance",
description: "Legal terms governing model usage and distribution",
subItems: [
"Commercial vs non-commercial use",
"Attribution requirements",
"Modification and redistribution rights",
"Liability and warranty terms",
],
},
]),
CO2_COST: createTooltipContent("Carbon Dioxide Emissions:", [
{
label: "What is it?",
description: "CO₂ emissions of the model evaluation ",
subItems: [
"Only focuses on model inference for our specific setup",
"Considers data center location and energy mix",
"Allows equivalent comparision of models on our use case",
],
},
{
label: "Why it matters",
description: "Environmental impact of AI model training",
subItems: [
"Large models can have significant carbon footprints",
"Helps make informed choices about model selection",
],
},
{
label: "Learn more",
description:
"For detailed information about our CO₂ calculation methodology, visit:",
subItems: [
<a
href="https://huggingface.co/docs/leaderboards/open_llm_leaderboard/emissions"
target="_blank"
rel="noopener noreferrer"
style={{ color: "#90caf9" }}
>
Carbon Emissions Documentation ↗
</a>,
],
},
]),
};
export const UI_TOOLTIPS = {
COLUMN_SELECTOR: "Choose which columns to display in the table",
DISPLAY_OPTIONS: createTooltipContent("Table Display Options", [
{
label: "Overview",
description: "Configure how the table displays data and information",
subItems: [
"Row size and layout",
"Score display format",
"Ranking calculation",
"Average score computation",
],
},
]),
SEARCH_BAR: createTooltipContent("Advanced Model Search", [
{
label: "Name Search",
description: "Search directly by model name",
subItems: [
"Supports regular expressions (e.g., ^mistral.*7b)",
"Case sensitive",
],
},
{
label: "Field Search",
description: "Use @field:value syntax for precise filtering",
subItems: [
"@architecture:llama - Filter by architecture",
"@license:mit - Filter by license",
"@precision:float16 - Filter by precision",
"@type:chat - Filter by model type",
],
},
{
label: "Multiple Searches",
description: "Combine multiple criteria using semicolons",
subItems: [
"meta @license:mit; @architecture:llama",
"^mistral.*7b; @precision:float16",
],
},
]),
QUICK_FILTERS: createTooltipContent(
"Filter models based on their size and applicable hardware:",
[
{
label: "Edge devices (Up to 3BB)",
description:
"Efficient models for edge devices, optimized for blazing fast inference.",
},
{
label: "Smol Models (3B-7B)",
description:
"Efficient models for consumer hardware, optimized for fast inference.",
},
{
label: "Mid-range models (7B-65B)",
description:
"A bit of everything here, with overall balanced performance and resource usage around 30B.",
},
{
label: "GPU-rich models (65B+)",
description:
"State-of-the-art performance for complex tasks, requires significant computing power.",
},
{
label: "Official Providers",
description:
"Models directly maintained by their original creators, ensuring reliability and up-to-date performance.",
},
]
),
ROW_SIZE: {
title: "Row Size",
description:
"Adjust the height of table rows. Compact is ideal for viewing more data at once, while Large provides better readability and touch targets.",
},
SCORE_DISPLAY: {
title: "Score Display",
description:
"Choose between normalized scores (0-100% scale for easy comparison) or raw scores (actual benchmark results). Normalized scores help compare performance across different benchmarks, while raw scores show actual benchmark outputs.",
},
RANKING_MODE: {
title: "Ranking Mode",
description:
"Choose between static ranking (original position in the full leaderboard) or dynamic ranking (position based on current filters and sorting).",
},
AVERAGE_SCORE: {
title: "Average Score Calculation",
description:
"Define how the average score is calculated. 'All Scores' uses all benchmarks, while 'Visible Only' calculates the average using only the visible benchmark columns.",
},
};
export const getTooltipStyle = {};
export const TABLE_TOOLTIPS = {
HUB_LINK: (modelName) => `View ${modelName} on Hugging Face Hub`,
EVAL_RESULTS: (modelName) =>
`View detailed evaluation results for ${modelName}`,
POSITION_CHANGE: (change) =>
`${Math.abs(change)} position${Math.abs(change) > 1 ? "s" : ""} ${
change > 0 ? "up" : "down"
}`,
METADATA: {
TYPE: (type) => type || "-",
ARCHITECTURE: (arch) => arch || "-",
PRECISION: (precision) => precision || "-",
LICENSE: (license) => license || "-",
UPLOAD_DATE: (date) => date || "-",
SUBMISSION_DATE: (date) => date || "-",
BASE_MODEL: (model) => model || "-",
},
};