vamseelatha2002 commited on
Commit
09d15e8
·
verified ·
1 Parent(s): 95df31f

Update evaluation.py

Browse files
Files changed (1) hide show
  1. evaluation.py +18 -2
evaluation.py CHANGED
@@ -113,19 +113,28 @@ def retrieve_ground_truths(question, dataset):
113
  return None,None # Return None if no match is found
114
 
115
 
116
- # Store RMSE for each metric in the global rmse_scores dictionary
117
  def store_rmse(question, predicted_metrics, ground_truth_metrics):
 
118
  for metric_name in predicted_metrics:
119
- # Ensure both predicted value and ground truth value are numeric before calculating RMSE
120
  predicted_value = predicted_metrics[metric_name]
 
 
121
  ground_truth_value = ground_truth_metrics.get(metric_name, None)
122
 
 
 
 
 
123
  if isinstance(predicted_value, (int, float)) and isinstance(ground_truth_value, (int, float)):
124
  rmse_value = compute_rmse([predicted_value], [ground_truth_value])
125
  if rmse_value is not None:
 
126
  if question not in rmse_scores:
127
  rmse_scores[question] = {}
128
  rmse_scores[question][metric_name] = rmse_value
 
 
129
 
130
  def calculate_metrics(question, q_dataset, response, docs, time_taken):
131
  data = load_query_dataset(q_dataset)
@@ -151,6 +160,13 @@ def calculate_metrics(question, q_dataset, response, docs, time_taken):
151
  "ground truth completeness": ground_truth_completeness
152
  }
153
 
 
 
 
 
 
 
 
154
 
155
  store_rmse(question, predicted_metrics, ground_truth_metrics)
156
  # Now, make sure the values passed to RMSE calculation are numeric
 
113
  return None,None # Return None if no match is found
114
 
115
 
116
+
117
  def store_rmse(question, predicted_metrics, ground_truth_metrics):
118
+ """Calculate and store RMSE for each metric."""
119
  for metric_name in predicted_metrics:
 
120
  predicted_value = predicted_metrics[metric_name]
121
+
122
+ # Get the corresponding ground truth value from ground_truth_metrics
123
  ground_truth_value = ground_truth_metrics.get(metric_name, None)
124
 
125
+ # Debugging: Check the values being compared
126
+ print(f"Comparing {metric_name}: Predicted = {predicted_value}, Ground Truth = {ground_truth_value}")
127
+
128
+ # Ensure both predicted value and ground truth value are numeric before calculating RMSE
129
  if isinstance(predicted_value, (int, float)) and isinstance(ground_truth_value, (int, float)):
130
  rmse_value = compute_rmse([predicted_value], [ground_truth_value])
131
  if rmse_value is not None:
132
+ print(f"RMSE for {metric_name}: {rmse_value}")
133
  if question not in rmse_scores:
134
  rmse_scores[question] = {}
135
  rmse_scores[question][metric_name] = rmse_value
136
+ else:
137
+ print(f"Skipping RMSE for {metric_name}: One or both values are non-numeric")
138
 
139
  def calculate_metrics(question, q_dataset, response, docs, time_taken):
140
  data = load_query_dataset(q_dataset)
 
160
  "ground truth completeness": ground_truth_completeness
161
  }
162
 
163
+ # Predicted metrics
164
+ predicted_metrics_rmse = {
165
+ "context_relevance": context_relevance(question, docs),
166
+ "context_utilization": context_utilization(response, docs),
167
+ "completeness": compute_cosine_similarity(response, ground_truth_answer), #completeness(response, ground_truth_answer),
168
+ "adherence": adherence(response, docs)
169
+ }
170
 
171
  store_rmse(question, predicted_metrics, ground_truth_metrics)
172
  # Now, make sure the values passed to RMSE calculation are numeric