Spaces:
Running
Running
update: Add debug checks and improve data handling in save_reactions_to_dataset function
Browse files
app.py
CHANGED
@@ -32,7 +32,22 @@ def load_credentials():
|
|
32 |
def authenticate(username, password, credentials):
|
33 |
return credentials.get(username) == password
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def save_reactions_to_dataset(user_type, username, query, results_mpnet, results_openai):
|
|
|
36 |
data = {
|
37 |
"user_type": [],
|
38 |
"username": [],
|
@@ -61,22 +76,42 @@ def save_reactions_to_dataset(user_type, username, query, results_mpnet, results
|
|
61 |
data["reaction"].append(result["reaction"])
|
62 |
|
63 |
try:
|
64 |
-
#
|
65 |
dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
|
66 |
existing_data = dataset.to_dict()
|
67 |
|
68 |
-
#
|
|
|
|
|
|
|
|
|
|
|
69 |
for key in data:
|
70 |
if key not in existing_data:
|
71 |
-
#
|
72 |
-
existing_data[key] = ["" if key in ["username", "model_type"] else None] *
|
|
|
|
|
|
|
73 |
existing_data[key].extend(data[key])
|
74 |
-
|
75 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
existing_data = data
|
77 |
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
def update_reaction(model_type, idx):
|
82 |
st.session_state.reactions[f"reaction_{model_type}_{idx}"] = st.session_state[f"reaction_{model_type}_{idx}"]
|
|
|
32 |
def authenticate(username, password, credentials):
|
33 |
return credentials.get(username) == password
|
34 |
|
35 |
+
def debug_check_before_save(data_dict):
|
36 |
+
# Check lengths
|
37 |
+
lengths = {k: len(v) for k, v in data_dict.items()}
|
38 |
+
print("\nDebug Check Results:")
|
39 |
+
print(f"All column lengths: {lengths}")
|
40 |
+
|
41 |
+
# Check last few entries
|
42 |
+
print("\nLast 4 entries of each column:")
|
43 |
+
for key, values in data_dict.items():
|
44 |
+
print(f"\n{key}:")
|
45 |
+
print(values[-4:])
|
46 |
+
|
47 |
+
return len(set(lengths.values())) == 1 # Returns True if all lengths match
|
48 |
+
|
49 |
def save_reactions_to_dataset(user_type, username, query, results_mpnet, results_openai):
|
50 |
+
# First prepare the new data
|
51 |
data = {
|
52 |
"user_type": [],
|
53 |
"username": [],
|
|
|
76 |
data["reaction"].append(result["reaction"])
|
77 |
|
78 |
try:
|
79 |
+
# Load existing dataset
|
80 |
dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
|
81 |
existing_data = dataset.to_dict()
|
82 |
|
83 |
+
# Calculate the exact existing length once
|
84 |
+
existing_length = len(next(iter(existing_data.values())))
|
85 |
+
print(f"Existing dataset length: {existing_length}")
|
86 |
+
print(f"New entries to add: {len(data['user_type'])}") # Debug print
|
87 |
+
|
88 |
+
# Handle missing columns
|
89 |
for key in data:
|
90 |
if key not in existing_data:
|
91 |
+
# Initialize missing columns with exactly existing_length entries
|
92 |
+
existing_data[key] = ["" if key in ["username", "model_type"] else None] * existing_length
|
93 |
+
|
94 |
+
# Now extend with new data
|
95 |
+
for key in data:
|
96 |
existing_data[key].extend(data[key])
|
97 |
+
|
98 |
+
# Verify final lengths
|
99 |
+
final_lengths = {k: len(v) for k, v in existing_data.items()}
|
100 |
+
print(f"Final lengths of all columns: {final_lengths}") # Debug print
|
101 |
+
|
102 |
+
if len(set(final_lengths.values())) > 1:
|
103 |
+
raise ValueError(f"Column length mismatch after merging: {final_lengths}")
|
104 |
+
|
105 |
+
except Exception as e:
|
106 |
+
print(f"Error occurred: {str(e)}")
|
107 |
+
# If loading fails, start fresh with just the new data
|
108 |
existing_data = data
|
109 |
|
110 |
+
if debug_check_before_save(existing_data):
|
111 |
+
updated_dataset = Dataset.from_dict(existing_data)
|
112 |
+
updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
|
113 |
+
else:
|
114 |
+
raise ValueError("Length mismatch detected in final check")
|
115 |
|
116 |
def update_reaction(model_type, idx):
|
117 |
st.session_state.reactions[f"reaction_{model_type}_{idx}"] = st.session_state[f"reaction_{model_type}_{idx}"]
|