eli02 commited on
Commit
ee42b39
·
1 Parent(s): 23d605d

update: Add debug checks and improve data handling in save_reactions_to_dataset function

Browse files
Files changed (1) hide show
  1. app.py +43 -8
app.py CHANGED
@@ -32,7 +32,22 @@ def load_credentials():
32
  def authenticate(username, password, credentials):
33
  return credentials.get(username) == password
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def save_reactions_to_dataset(user_type, username, query, results_mpnet, results_openai):
 
36
  data = {
37
  "user_type": [],
38
  "username": [],
@@ -61,22 +76,42 @@ def save_reactions_to_dataset(user_type, username, query, results_mpnet, results
61
  data["reaction"].append(result["reaction"])
62
 
63
  try:
64
- # Try to load existing dataset
65
  dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
66
  existing_data = dataset.to_dict()
67
 
68
- # Handle missing columns in existing data
 
 
 
 
 
69
  for key in data:
70
  if key not in existing_data:
71
- # Add default values for existing rows
72
- existing_data[key] = ["" if key in ["username", "model_type"] else None] * len(existing_data.get(next(iter(existing_data)), []))
 
 
 
73
  existing_data[key].extend(data[key])
74
- except Exception:
75
- # If dataset doesn't exist, use the new data structure
 
 
 
 
 
 
 
 
 
76
  existing_data = data
77
 
78
- updated_dataset = Dataset.from_dict(existing_data)
79
- updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
 
 
 
80
 
81
  def update_reaction(model_type, idx):
82
  st.session_state.reactions[f"reaction_{model_type}_{idx}"] = st.session_state[f"reaction_{model_type}_{idx}"]
 
32
  def authenticate(username, password, credentials):
33
  return credentials.get(username) == password
34
 
35
+ def debug_check_before_save(data_dict):
36
+ # Check lengths
37
+ lengths = {k: len(v) for k, v in data_dict.items()}
38
+ print("\nDebug Check Results:")
39
+ print(f"All column lengths: {lengths}")
40
+
41
+ # Check last few entries
42
+ print("\nLast 4 entries of each column:")
43
+ for key, values in data_dict.items():
44
+ print(f"\n{key}:")
45
+ print(values[-4:])
46
+
47
+ return len(set(lengths.values())) == 1 # Returns True if all lengths match
48
+
49
  def save_reactions_to_dataset(user_type, username, query, results_mpnet, results_openai):
50
+ # First prepare the new data
51
  data = {
52
  "user_type": [],
53
  "username": [],
 
76
  data["reaction"].append(result["reaction"])
77
 
78
  try:
79
+ # Load existing dataset
80
  dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
81
  existing_data = dataset.to_dict()
82
 
83
+ # Calculate the exact existing length once
84
+ existing_length = len(next(iter(existing_data.values())))
85
+ print(f"Existing dataset length: {existing_length}")
86
+ print(f"New entries to add: {len(data['user_type'])}") # Debug print
87
+
88
+ # Handle missing columns
89
  for key in data:
90
  if key not in existing_data:
91
+ # Initialize missing columns with exactly existing_length entries
92
+ existing_data[key] = ["" if key in ["username", "model_type"] else None] * existing_length
93
+
94
+ # Now extend with new data
95
+ for key in data:
96
  existing_data[key].extend(data[key])
97
+
98
+ # Verify final lengths
99
+ final_lengths = {k: len(v) for k, v in existing_data.items()}
100
+ print(f"Final lengths of all columns: {final_lengths}") # Debug print
101
+
102
+ if len(set(final_lengths.values())) > 1:
103
+ raise ValueError(f"Column length mismatch after merging: {final_lengths}")
104
+
105
+ except Exception as e:
106
+ print(f"Error occurred: {str(e)}")
107
+ # If loading fails, start fresh with just the new data
108
  existing_data = data
109
 
110
+ if debug_check_before_save(existing_data):
111
+ updated_dataset = Dataset.from_dict(existing_data)
112
+ updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
113
+ else:
114
+ raise ValueError("Length mismatch detected in final check")
115
 
116
  def update_reaction(model_type, idx):
117
  st.session_state.reactions[f"reaction_{model_type}_{idx}"] = st.session_state[f"reaction_{model_type}_{idx}"]