Spaces:
Running
Running
update: Enhance dataset loading to handle missing columns and streamline data integration
Browse files
app.py
CHANGED
@@ -61,20 +61,19 @@ def save_reactions_to_dataset(user_type, username, query, results_mpnet, results
|
|
61 |
data["reaction"].append(result["reaction"])
|
62 |
|
63 |
try:
|
|
|
64 |
dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
|
65 |
existing_data = dataset.to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
except Exception:
|
67 |
-
|
68 |
-
|
69 |
-
"username": [],
|
70 |
-
"query": [],
|
71 |
-
"retrieved_text": [],
|
72 |
-
"model_type": [],
|
73 |
-
"reaction": []
|
74 |
-
}
|
75 |
-
|
76 |
-
for key in data:
|
77 |
-
existing_data[key].extend(data[key])
|
78 |
|
79 |
updated_dataset = Dataset.from_dict(existing_data)
|
80 |
updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
|
|
|
61 |
data["reaction"].append(result["reaction"])
|
62 |
|
63 |
try:
|
64 |
+
# Try to load existing dataset
|
65 |
dataset = load_dataset("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation", split="train")
|
66 |
existing_data = dataset.to_dict()
|
67 |
+
|
68 |
+
# Handle missing columns in existing data
|
69 |
+
for key in data:
|
70 |
+
if key not in existing_data:
|
71 |
+
# Add default values for existing rows
|
72 |
+
existing_data[key] = ["" if key in ["user_type", "username", "model_type"] else None] * len(existing_data.get(next(iter(existing_data)), []))
|
73 |
+
existing_data[key].extend(data[key])
|
74 |
except Exception:
|
75 |
+
# If dataset doesn't exist, use the new data structure
|
76 |
+
existing_data = data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
updated_dataset = Dataset.from_dict(existing_data)
|
79 |
updated_dataset.push_to_hub("HumbleBeeAI/al-ghazali-rag-retrieval-evaluation")
|