Spaces:
Sleeping
Sleeping
Update evaluation.py
Browse files- evaluation.py +11 -9
evaluation.py
CHANGED
@@ -203,15 +203,8 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
203 |
print(f"Overall RMSE: {overall_rmse}")
|
204 |
else:
|
205 |
print("Invalid RMSE calculation due to non-numeric values.")
|
206 |
-
|
207 |
-
|
208 |
-
rmse_value = compute_rmse(predicted_metrics_rmse.values(), ground_truth_metrics.values())
|
209 |
-
predicted_metrics_rmse["rmse"] = rmse_value # Adding RMSE to metrics
|
210 |
-
else:
|
211 |
-
predicted_metrics_rmse["rmse"] = "Invalid RMSE calculation"
|
212 |
-
|
213 |
-
'''
|
214 |
-
predicted_metrics = {
|
215 |
"RAG_model_response": response,
|
216 |
"ground_truth": ground_truth_answer,
|
217 |
"context_relevance": context_relevance(question, docs),
|
@@ -222,6 +215,15 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
|
|
222 |
"ground truth completeness": ground_truth_completeness,
|
223 |
"rmse": overall_rmse
|
224 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
return predicted_metrics
|
226 |
|
227 |
''' def retrieve_ground_truths(question, dataset):
|
|
|
203 |
print(f"Overall RMSE: {overall_rmse}")
|
204 |
else:
|
205 |
print("Invalid RMSE calculation due to non-numeric values.")
|
206 |
+
|
207 |
+
predicted_metrics = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
"RAG_model_response": response,
|
209 |
"ground_truth": ground_truth_answer,
|
210 |
"context_relevance": context_relevance(question, docs),
|
|
|
215 |
"ground truth completeness": ground_truth_completeness,
|
216 |
"rmse": overall_rmse
|
217 |
}
|
218 |
+
'''
|
219 |
+
if isinstance(predicted_metrics_rmse, (int, float)) and isinstance(ground_truth_metrics, (int, float)):
|
220 |
+
rmse_value = compute_rmse(predicted_metrics_rmse.values(), ground_truth_metrics.values())
|
221 |
+
predicted_metrics_rmse["rmse"] = rmse_value # Adding RMSE to metrics
|
222 |
+
else:
|
223 |
+
predicted_metrics_rmse["rmse"] = "Invalid RMSE calculation"
|
224 |
+
|
225 |
+
'''
|
226 |
+
|
227 |
return predicted_metrics
|
228 |
|
229 |
''' def retrieve_ground_truths(question, dataset):
|