Spaces:

evalitahf
/

evalita_llm_leaderboard

Running

rzanoli commited on Mar 27

Commit

4456936

1 Parent(s): 602e1b0

Small Changes

Files changed (1) hide show

src/tasks.py CHANGED Viewed

@@ -20,7 +20,7 @@ Evalita-LLM is a benchmark designed to evaluate Large Language Models (LLMs) on
 """
 #MEASURE_DESCRIPTION = "Combined Performance = (1 - (Best_Prompt - Prompt_Average) / 100) * Best_Prompt. Prompt Average = accuracy averaged over the six prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above)"
-MEASURE_DESCRIPTION = "**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the assessed prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above)."
 # Tasks Descriptions
 TE_DESCRIPTION = """### Textual Entailment (TE) --- *Multiple-choice task*

 """
 #MEASURE_DESCRIPTION = "Combined Performance = (1 - (Best_Prompt - Prompt_Average) / 100) * Best_Prompt. Prompt Average = accuracy averaged over the six prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above)"
+MEASURE_DESCRIPTION = "<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the assessed prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above).</small>"
 # Tasks Descriptions
 TE_DESCRIPTION = """### Textual Entailment (TE) --- *Multiple-choice task*