Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -26,31 +26,31 @@ def load_dataset():
|
|
26 |
tempo_filtered=[]
|
27 |
dataset=load_dataset("gaia-benchmark/GAIA","2023_level1",trust_remote_code=True)
|
28 |
for question in dataset['validation']:
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
print("------------------------------------------------------------------")
|
47 |
-
tempo_filtered.append(question) # Add to the filtered list
|
48 |
-
# else: # Optional: Handle items that don't match the filter
|
49 |
-
# print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Tools: {num_tools}, Steps: {num_steps}")
|
50 |
-
except ValueError:
|
51 |
-
# Handle cases where 'Number of tools' or 'Number of steps' is not a valid integer
|
52 |
-
print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Could not convert tool/step count to integer.")
|
53 |
-
print("------------------------------------------------------------------")
|
54 |
filtered_data=tempo_filtered
|
55 |
|
56 |
# Prepare data structures for the API
|
|
|
26 |
tempo_filtered=[]
|
27 |
dataset=load_dataset("gaia-benchmark/GAIA","2023_level1",trust_remote_code=True)
|
28 |
for question in dataset['validation']:
|
29 |
+
metadata = question.get('Annotator Metadata') # Use .get() for safety
|
30 |
+
|
31 |
+
if metadata: # Check if 'Annotator Metadata' exists
|
32 |
+
num_tools_str = metadata.get('Number of tools')
|
33 |
+
num_steps_str = metadata.get('Number of steps')
|
34 |
+
|
35 |
+
# Check if both numbers exist before trying to convert
|
36 |
+
if num_tools_str is not None and num_steps_str is not None:
|
37 |
+
try:
|
38 |
+
# Convert values to integers for comparison
|
39 |
+
num_tools = int(num_tools_str)
|
40 |
+
num_steps = int(num_steps_str)
|
41 |
+
|
42 |
+
# Apply the filter conditions
|
43 |
+
if num_tools < tool_threshold and num_steps < step_threshold:
|
44 |
+
print(f"MATCH FOUND (Task ID: {question.get('task_id', 'N/A')}) - Tools: {num_tools}, Steps: {num_steps}")
|
45 |
+
print(question) # Print the matching question dictionary
|
46 |
+
print("------------------------------------------------------------------")
|
47 |
+
tempo_filtered.append(question) # Add to the filtered list
|
48 |
+
# else: # Optional: Handle items that don't match the filter
|
49 |
+
# print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Tools: {num_tools}, Steps: {num_steps}")
|
50 |
+
except ValueError:
|
51 |
+
# Handle cases where 'Number of tools' or 'Number of steps' is not a valid integer
|
52 |
+
print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Could not convert tool/step count to integer.")
|
53 |
print("------------------------------------------------------------------")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
filtered_data=tempo_filtered
|
55 |
|
56 |
# Prepare data structures for the API
|