LeonardoErcolani commited on
Commit
9fcf6ea
·
verified ·
1 Parent(s): 7e44c6f

Create icc.py

Browse files
Files changed (1) hide show
  1. icc.py +86 -0
icc.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import pingouin as pg
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Set up output directories
8
+ HEATMAPS_FOLDER = "icc_heatmaps/"
9
+ os.makedirs(HEATMAPS_FOLDER, exist_ok=True)
10
+
11
+
12
+ def preprocess_data(df, selected_assessors, selected_respondents, selected_criteria):
13
+ """
14
+ Filters the dataset based on user-selected assessors, respondents, and criteria.
15
+ Ensures data is properly formatted for ICC computation.
16
+ """
17
+ df = df[df["assessor"].isin(selected_assessors) & df["respondent"].isin(selected_respondents)]
18
+ df = df[["assessor", "respondent"] + selected_criteria]
19
+
20
+ # Convert all columns to numeric (handling comma decimals)
21
+ for col in selected_criteria:
22
+ df[col] = df[col].str.replace(",", ".").astype(float)
23
+
24
+ # Ensure 'assessor' and 'respondent' are treated as categorical
25
+ df["assessor"] = df["assessor"].astype(str)
26
+ df["respondent"] = df["respondent"].astype(str)
27
+
28
+ return df
29
+
30
+
31
+ def compute_icc(df):
32
+ """
33
+ Computes the overall ICC (Intraclass Correlation Coefficient).
34
+ """
35
+ melted_df = df.melt(id_vars=["assessor", "respondent"], var_name="Criterion", value_name="Score")
36
+
37
+ if melted_df["respondent"].nunique() >= 5:
38
+ icc_results = pg.intraclass_corr(data=melted_df, targets="respondent", raters="assessor", ratings="Score").round(3)
39
+ return icc_results
40
+ else:
41
+ return None
42
+
43
+
44
+ def compute_assessor_icc(df):
45
+ """
46
+ Computes ICC matrices between assessors and generates heatmaps.
47
+ """
48
+ melted_df = df.melt(id_vars=["assessor", "respondent"], var_name="Criterion", value_name="Score")
49
+ assessors = df["assessor"].unique()
50
+ icc_matrix_types = {icc_type: pd.DataFrame(index=assessors, columns=assessors, dtype=float) for icc_type in ["ICC1", "ICC2", "ICC3"]}
51
+
52
+ for assessor1 in assessors:
53
+ for assessor2 in assessors:
54
+ if assessor1 != assessor2:
55
+ subset = melted_df[melted_df["assessor"].isin([assessor1, assessor2])]
56
+
57
+ if subset["respondent"].nunique() >= 5:
58
+ icc_results = pg.intraclass_corr(
59
+ data=subset, targets="respondent", raters="assessor", ratings="Score"
60
+ ).round(3)
61
+
62
+ for icc_type in ["ICC1", "ICC2", "ICC3"]:
63
+ icc_matrix_types[icc_type].loc[assessor1, assessor2] = icc_results.set_index("Type").loc[icc_type]["ICC"]
64
+
65
+ return icc_matrix_types
66
+
67
+
68
+ def generate_heatmaps(icc_matrix_types):
69
+ """
70
+ Generates and saves heatmaps for ICC matrices.
71
+ """
72
+ heatmap_files = {}
73
+ for icc_type, icc_matrix in icc_matrix_types.items():
74
+ plt.figure(figsize=(8, 6))
75
+ sns.heatmap(icc_matrix.astype(float), annot=True, cmap="coolwarm", linewidths=0.5, fmt=".2f")
76
+ plt.title(f"Assessor ICC Matrix ({icc_type})")
77
+ plt.xlabel("Assessor (LLM)")
78
+ plt.ylabel("Assessor (LLM)")
79
+ plt.xticks(rotation=45)
80
+ plt.yticks(rotation=0)
81
+ heatmap_file = os.path.join(HEATMAPS_FOLDER, f"icc_matrix_{icc_type}.png")
82
+ plt.savefig(heatmap_file)
83
+ plt.close()
84
+ heatmap_files[icc_type] = heatmap_file
85
+
86
+ return heatmap_files