Spaces:
Sleeping
Sleeping
Update evaluation.py
Browse files- evaluation.py +8 -2
evaluation.py
CHANGED
@@ -96,18 +96,24 @@ def compute_rmse(predicted_values, ground_truth_values):
|
|
96 |
else:
|
97 |
print("Invalid input for RMSE calculation. Ensure all values are numeric.")
|
98 |
return None
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
100 |
def retrieve_ground_truths(question, dataset):
|
101 |
"""Retrieve the ground truth answer for a given question from the dataset."""
|
102 |
for split_name, instances in dataset.items():
|
103 |
for instance in instances:
|
104 |
if instance['question'] == question:
|
105 |
instance_response = instance['response']
|
|
|
106 |
ground_truth_metrics = {
|
107 |
"context_relevance": instance['relevance_score'],
|
108 |
"context_utilization": instance['utilization_score'],
|
109 |
"completeness": instance['completeness_score'],
|
110 |
-
"adherence":
|
111 |
}
|
112 |
return instance_response, ground_truth_metrics # Return the ground truth response immediately
|
113 |
return None,None # Return None if no match is found
|
|
|
96 |
else:
|
97 |
print("Invalid input for RMSE calculation. Ensure all values are numeric.")
|
98 |
return None
|
99 |
+
def convert_adherence_to_numerical(adherence_score):
|
100 |
+
if adherence_score:
|
101 |
+
return 1 # True becomes 1
|
102 |
+
else:
|
103 |
+
return 0 # False becomes 0
|
104 |
+
|
105 |
def retrieve_ground_truths(question, dataset):
|
106 |
"""Retrieve the ground truth answer for a given question from the dataset."""
|
107 |
for split_name, instances in dataset.items():
|
108 |
for instance in instances:
|
109 |
if instance['question'] == question:
|
110 |
instance_response = instance['response']
|
111 |
+
adherence_numerical = convert_adherence_to_numerical(instance['adherence_score'])
|
112 |
ground_truth_metrics = {
|
113 |
"context_relevance": instance['relevance_score'],
|
114 |
"context_utilization": instance['utilization_score'],
|
115 |
"completeness": instance['completeness_score'],
|
116 |
+
"adherence": adherence_numerical
|
117 |
}
|
118 |
return instance_response, ground_truth_metrics # Return the ground truth response immediately
|
119 |
return None,None # Return None if no match is found
|