import { Box, Typography } from "@mui/material"; const createTooltipContent = (title, items) => ( {title} {items.map(({ label, description, subItems }, index) => (
  • {label}: {description} {subItems && ( {subItems.map((item, subIndex) => (
  • {item}
  • ))}
    )} ))}
    ); export const COLUMN_TOOLTIPS = { AVERAGE: createTooltipContent("Average score across all benchmarks:", [ { label: "Calculation", description: "Weighted average of normalized scores from all benchmarks", subItems: [ "Each benchmark is normalized to a 0-100 scale", "All normalised benchmarks are then averaged together", ], }, ]), MULTIFIN: createTooltipContent("Multilingual Financial NLP (MultiFin):", [ { label: "Purpose", description: "Tests model's ability to understand real-world financial article headlines", subItems: ["Language Understanding", "Topic Classification"], }, { label: "Scoring: ACC Norm", description: "Was the correct choice selected among the options.", }, ]), QA: createTooltipContent("Greek Financial Question Answering (QA):", [ { label: "Purpose", description: "Tests model's ability to understand real-world Greek financial exam questions", subItems: ["Language Understanding", "Question Answering"], }, { label: "Scoring: ACC Norm", description: "Was the correct choice selected among the options.", }, ]), FNS: createTooltipContent("Financial Narrative Summarization (FNS):", [ { label: "Purpose", description: "Tests model's ability to summarize real-world financial annual reports", subItems: ["Language Understanding", "Abstractive Summarization"], }, { label: "Scoring: Rouge1", description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.", }, ]), FinNum: createTooltipContent("Financial Number Entity Recognition (FinNum):", [ { label: "Purpose", description: "Tests model's ability to recognize numbers from financial texts", subItems: ["Language Understanding", "Named Entity Recognition"], }, { label: "Scoring: Rouge1", description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.", }, ]), FinText: createTooltipContent("Financial Named Entity Recognition (FinText):", [ { label: "Purpose", description: "Tests model's ability to recognize financial entities, i.e, person, location, and organization", subItems: ["Language Understanding", "Named Entity Recognition"], }, { label: "Scoring: Rouge1", description: "Was the overlap of unigrams (each word) between the predicted and reference summaries.", }, ]), ARCHITECTURE: createTooltipContent("Model Architecture Information:", [ { label: "Definition", description: "The fundamental structure and design of the model", subItems: [ "Pretrained: Foundational models, initially trained on large datasets without task-specific tuning, serving as a versatile base for further development.", "Continuously Pretrained: Base models trained with a data mix evolving as the model is trained, with the addition of specialized data during the last training steps.", "Fine-tuned: Base models, fine-tuned on specialised domain data (legal, medical, ...), and optimized for particular tasks.", "Chat: Models fine-tuned with IFT, RLHF, DPO, and other techniques, to handle conversational contexts effectively.", "Merged: Combining multiple models through weights averaging or similar methods.", "Multimodal: Models which can handle several modalities (text & image/audio/video/...). We only evaluate the text capabilities.", ], }, { label: "Impact", description: "How architecture affects model capabilities", subItems: [ "Base models are expected to perform less well on instruction following evaluations, like IFEval.", "Fine-tuned and chat models can be more verbose and more chatty than base models.", "Merged models tend to exhibit good performance on benchmarks, which do not translate to real-world situations.", ], }, ]), PRECISION: createTooltipContent("Numerical Precision Format:", [ { label: "Overview", description: "Data format used to store model weights and perform computations", subItems: [ "bfloat16: Half precision (Brain Float format), good for stability", "float16: Half precision", "8bit/4bit: Quantized formats, for efficiency", "GPTQ/AWQ: Quantized methods", ], }, { label: "Impact", description: "How precision affects model deployment", subItems: [ "Higher precision = better accuracy but more memory usage", "Lower precision = faster inference and smaller size", "Trade-off between model quality and resource usage", ], }, ]), FLAGS: createTooltipContent("Model Flags and Special Features:", [ { label: "Filters", subItems: [ "Mixture of Expert: Uses a MoE architecture", "Merged models: Created by averaging other models", "Contaminated: Flagged by users from the community for (possibly accidental) cheating", "Unavailable: No longer on the hub (private, deleted) or missing a license tag", ], }, { label: "Purpose", description: "Why do people want to hide these models?", subItems: [ "Mixture of Experts: These models can be too parameter heavy", "Merged models: Performance on benchmarks tend to be inflated compared to real life usage", "Contaminated: Performance on benchmarks is inflated and not reflecting real life usage", ], }, ]), PARAMETERS: createTooltipContent("Model Parameters:", [ { label: "Measurement", description: "Total number of trainable parameters in billions", subItems: [ "Indicates model capacity and complexity", "Correlates with computational requirements", "Influences memory usage and inference speed", ], }, ]), LICENSE: createTooltipContent("Model License Information:", [ { label: "Importance", description: "Legal terms governing model usage and distribution", subItems: [ "Commercial vs non-commercial use", "Attribution requirements", "Modification and redistribution rights", "Liability and warranty terms", ], }, ]), CO2_COST: createTooltipContent("Carbon Dioxide Emissions:", [ { label: "What is it?", description: "CO₂ emissions of the model evaluation ", subItems: [ "Only focuses on model inference for our specific setup", "Considers data center location and energy mix", "Allows equivalent comparision of models on our use case", ], }, { label: "Why it matters", description: "Environmental impact of AI model training", subItems: [ "Large models can have significant carbon footprints", "Helps make informed choices about model selection", ], }, { label: "Learn more", description: "For detailed information about our CO₂ calculation methodology, visit:", subItems: [ Carbon Emissions Documentation ↗ , ], }, ]), }; export const UI_TOOLTIPS = { COLUMN_SELECTOR: "Choose which columns to display in the table", DISPLAY_OPTIONS: createTooltipContent("Table Display Options", [ { label: "Overview", description: "Configure how the table displays data and information", subItems: [ "Row size and layout", "Score display format", "Ranking calculation", "Average score computation", ], }, ]), SEARCH_BAR: createTooltipContent("Advanced Model Search", [ { label: "Name Search", description: "Search directly by model name", subItems: [ "Supports regular expressions (e.g., ^mistral.*7b)", "Case sensitive", ], }, { label: "Field Search", description: "Use @field:value syntax for precise filtering", subItems: [ "@architecture:llama - Filter by architecture", "@license:mit - Filter by license", "@precision:float16 - Filter by precision", "@type:chat - Filter by model type", ], }, { label: "Multiple Searches", description: "Combine multiple criteria using semicolons", subItems: [ "meta @license:mit; @architecture:llama", "^mistral.*7b; @precision:float16", ], }, ]), QUICK_FILTERS: createTooltipContent( "Filter models based on their size and applicable hardware:", [ { label: "Edge devices (Up to 3BB)", description: "Efficient models for edge devices, optimized for blazing fast inference.", }, { label: "Smol Models (3B-7B)", description: "Efficient models for consumer hardware, optimized for fast inference.", }, { label: "Mid-range models (7B-65B)", description: "A bit of everything here, with overall balanced performance and resource usage around 30B.", }, { label: "GPU-rich models (65B+)", description: "State-of-the-art performance for complex tasks, requires significant computing power.", }, { label: "Official Providers", description: "Models directly maintained by their original creators, ensuring reliability and up-to-date performance.", }, ] ), ROW_SIZE: { title: "Row Size", description: "Adjust the height of table rows. Compact is ideal for viewing more data at once, while Large provides better readability and touch targets.", }, SCORE_DISPLAY: { title: "Score Display", description: "Choose between normalized scores (0-100% scale for easy comparison) or raw scores (actual benchmark results). Normalized scores help compare performance across different benchmarks, while raw scores show actual benchmark outputs.", }, RANKING_MODE: { title: "Ranking Mode", description: "Choose between static ranking (original position in the full leaderboard) or dynamic ranking (position based on current filters and sorting).", }, AVERAGE_SCORE: { title: "Average Score Calculation", description: "Define how the average score is calculated. 'All Scores' uses all benchmarks, while 'Visible Only' calculates the average using only the visible benchmark columns.", }, }; export const getTooltipStyle = {}; export const TABLE_TOOLTIPS = { HUB_LINK: (modelName) => `View ${modelName} on Hugging Face Hub`, EVAL_RESULTS: (modelName) => `View detailed evaluation results for ${modelName}`, POSITION_CHANGE: (change) => `${Math.abs(change)} position${Math.abs(change) > 1 ? "s" : ""} ${ change > 0 ? "up" : "down" }`, METADATA: { TYPE: (type) => type || "-", ARCHITECTURE: (arch) => arch || "-", PRECISION: (precision) => precision || "-", LICENSE: (license) => license || "-", UPLOAD_DATE: (date) => date || "-", SUBMISSION_DATE: (date) => date || "-", BASE_MODEL: (model) => model || "-", }, };