Spaces:
Sleeping
Sleeping
Update evaluation.py
Browse files- evaluation.py +20 -2
evaluation.py
CHANGED
@@ -194,13 +194,31 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
194 |
for metric_name in ground_truth_metrics:
|
195 |
ground_truth_value = ground_truth_metrics[metric_name]
|
196 |
print(f"RMSE for {metric_name}: {ground_truth_value}")
|
197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
if isinstance(predicted_metrics_rmse, (int, float)) and isinstance(ground_truth_metrics, (int, float)):
|
199 |
rmse_value = compute_rmse(predicted_metrics_rmse.values(), ground_truth_metrics.values())
|
200 |
predicted_metrics_rmse["rmse"] = rmse_value # Adding RMSE to metrics
|
201 |
else:
|
202 |
predicted_metrics_rmse["rmse"] = "Invalid RMSE calculation"
|
203 |
-
|
204 |
return predicted_metrics
|
205 |
|
206 |
''' def retrieve_ground_truths(question, dataset):
|
|
|
194 |
for metric_name in ground_truth_metrics:
|
195 |
ground_truth_value = ground_truth_metrics[metric_name]
|
196 |
print(f"RMSE for {metric_name}: {ground_truth_value}")
|
197 |
+
rmse_values = []
|
198 |
+
ground_truth_values = []
|
199 |
+
for metric_name in predicted_metrics_rmse:
|
200 |
+
predicted_value = predicted_metrics_rmse[metric_name]
|
201 |
+
ground_truth_value = ground_truth_metrics.get(metric_name, None)
|
202 |
+
|
203 |
+
# Ensure both predicted and ground truth values are numeric
|
204 |
+
if isinstance(predicted_value, (int, float)) and isinstance(ground_truth_value, (int, float)):
|
205 |
+
rmse_values.append(predicted_value)
|
206 |
+
ground_truth_values.append(ground_truth_value)
|
207 |
+
else:
|
208 |
+
print(f"Skipping RMSE for {metric_name}: One or both values are non-numeric")
|
209 |
+
|
210 |
+
if rmse_values and ground_truth_values:
|
211 |
+
overall_rmse = compute_rmse(rmse_values, ground_truth_values)
|
212 |
+
print(f"Overall RMSE: {overall_rmse}")
|
213 |
+
else:
|
214 |
+
print("Invalid RMSE calculation due to non-numeric values.")
|
215 |
+
'''
|
216 |
if isinstance(predicted_metrics_rmse, (int, float)) and isinstance(ground_truth_metrics, (int, float)):
|
217 |
rmse_value = compute_rmse(predicted_metrics_rmse.values(), ground_truth_metrics.values())
|
218 |
predicted_metrics_rmse["rmse"] = rmse_value # Adding RMSE to metrics
|
219 |
else:
|
220 |
predicted_metrics_rmse["rmse"] = "Invalid RMSE calculation"
|
221 |
+
'''
|
222 |
return predicted_metrics
|
223 |
|
224 |
''' def retrieve_ground_truths(question, dataset):
|