Small Changes
Browse files- src/tasks.py +1 -1
src/tasks.py
CHANGED
@@ -20,7 +20,7 @@ Evalita-LLM is a benchmark designed to evaluate Large Language Models (LLMs) on
|
|
20 |
"""
|
21 |
|
22 |
#MEASURE_DESCRIPTION = "Combined Performance = (1 - (Best_Prompt - Prompt_Average) / 100) * Best_Prompt. Prompt Average = accuracy averaged over the six prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above)"
|
23 |
-
MEASURE_DESCRIPTION = "
|
24 |
|
25 |
# Tasks Descriptions
|
26 |
TE_DESCRIPTION = """### Textual Entailment (TE) --- *Multiple-choice task*
|
|
|
20 |
"""
|
21 |
|
22 |
#MEASURE_DESCRIPTION = "Combined Performance = (1 - (Best_Prompt - Prompt_Average) / 100) * Best_Prompt. Prompt Average = accuracy averaged over the six prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above)"
|
23 |
+
MEASURE_DESCRIPTION = "<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the assessed prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above).</small>"
|
24 |
|
25 |
# Tasks Descriptions
|
26 |
TE_DESCRIPTION = """### Textual Entailment (TE) --- *Multiple-choice task*
|