Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,8 @@ st.set_page_config(layout="wide", page_title="LLM's Scores Evaluation: ICC Compu
|
|
7 |
|
8 |
# Title and instructions
|
9 |
st.title("📊 LLM's Scores Evaluation: ICC Computation")
|
10 |
-
st.markdown("
|
|
|
11 |
|
12 |
# **Two Side-by-Side Containers**
|
13 |
container_left, container_right = st.columns([1, 2]) # Left (Filters) | Right (ICC Results + Heatmaps)
|
|
|
7 |
|
8 |
# Title and instructions
|
9 |
st.title("📊 LLM's Scores Evaluation: ICC Computation")
|
10 |
+
st.markdown("This app supports a scientific study on peer review among Large Language Models (LLMs) (https://arxiv.org/abs/2412.09385). Before computing inter-rater agreement (ICC), responses from multiple LLMs are collected on a forecasting task, and each model is then asked to evaluate all responses using predefined criteria. This tool allows "
|
11 |
+
"researchers to upload those evaluation scores, filter the data, and analyze model agreement through ICC metrics and heatmaps.")
|
12 |
|
13 |
# **Two Side-by-Side Containers**
|
14 |
container_left, container_right = st.columns([1, 2]) # Left (Filters) | Right (ICC Results + Heatmaps)
|