Jofthomas commited on
Commit
9f53a53
·
verified ·
1 Parent(s): 571c3b5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -21
main.py CHANGED
@@ -9,6 +9,9 @@ from datetime import datetime, timezone
9
  import logging
10
  import uvicorn # To run the app
11
 
 
 
 
12
  # --- Configuration ---
13
  HF_DATASET_ID = "agents-course/unit4-students-scores"
14
  # Ensure you have write access to this dataset repository on Hugging Face
@@ -17,26 +20,38 @@ HF_DATASET_ID = "agents-course/unit4-students-scores"
17
  # --- Logging Setup ---
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
-
21
- # --- Load and Prepare Filtered Questions ---
22
- # Placeholder: Replace this with your actual filtered data loading logic
23
- # This data MUST contain 'task_id', 'Question', and 'Final answer'
24
- # Example structure:
25
- # filtered_data = [
26
- # {'task_id': 'e1fc63a2-da7a-432f-be78-7c4a95598703', 'Question': 'If Eliud Kipchoge...', 'Final answer': '17', ... other keys ...},
27
- # {'task_id': 'example_pass', 'Question': 'Another question', 'Final answer': '42', ... other keys ...},
28
- # # ... more filtered questions
29
- # ]
30
-
31
- # Let's simulate loading your filtered data (replace with your actual loading)
32
- # Assuming you have the 'filtered_questions' list from the previous step's code
33
- # Example data if you don't have it handy:
34
- filtered_data = [
35
- {'task_id': 'q1', 'Question': 'What is 2+2?', 'Level': '1', 'Final answer': '4', 'Annotator Metadata': {'Number of steps': '1', 'Number of tools': '1'}},
36
- {'task_id': 'q2', 'Question': 'Capital of France?', 'Level': '1', 'Final answer': 'Paris', 'Annotator Metadata': {'Number of steps': '1', 'Number of tools': '1'}},
37
- {'task_id': 'q3', 'Question': '10 / 2 ?', 'Level': '1', 'Final answer': '5', 'Annotator Metadata': {'Number of steps': '1', 'Number of tools': '1'}}
38
- ]
39
- # filtered_data = filtered_questions # Uncomment this if you have the list from previous step
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Prepare data structures for the API
42
  questions_for_api: List[Dict[str, str]] = []
@@ -169,7 +184,8 @@ def update_huggingface_dataset(username: str, score: float):
169
  # Ensure the schema matches if columns were added/modified.
170
  # Use 'train' split convention.
171
  updated_ds = DatasetDict({'train': Dataset.from_pandas(df)})
172
- updated_ds.push_to_hub(HF_DATASET_ID) # Token should be picked up from env or login
 
173
  logger.info("Dataset push successful.")
174
  return True
175
  else:
@@ -311,4 +327,5 @@ if __name__ == "__main__":
311
  local_port = int(os.getenv("PORT", "8000"))
312
  logger.info(f"Running Uvicorn locally on port: {local_port}")
313
  # Note: host='127.0.0.1' is usually fine for local runs outside docker
 
314
  uvicorn.run(app, host="127.0.0.1", port=local_port, log_level="info")
 
9
  import logging
10
  import uvicorn # To run the app
11
 
12
+ tool_threshold = 3
13
+ step_threshold = 5
14
+
15
  # --- Configuration ---
16
  HF_DATASET_ID = "agents-course/unit4-students-scores"
17
  # Ensure you have write access to this dataset repository on Hugging Face
 
20
  # --- Logging Setup ---
21
  logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
+ filtered_dataset=None
24
+ def load_dataset():
25
+ global filtered_dataset
26
+ tempo_filtered=[]
27
+ dataset=load_dataset("gaia-benchmark/GAIA","2023_level1",trust_remote_code=True)
28
+ for question in dataset['validation']:
29
+ metadata = question.get('Annotator Metadata') # Use .get() for safety
30
+
31
+ if metadata: # Check if 'Annotator Metadata' exists
32
+ num_tools_str = metadata.get('Number of tools')
33
+ num_steps_str = metadata.get('Number of steps')
34
+
35
+ # Check if both numbers exist before trying to convert
36
+ if num_tools_str is not None and num_steps_str is not None:
37
+ try:
38
+ # Convert values to integers for comparison
39
+ num_tools = int(num_tools_str)
40
+ num_steps = int(num_steps_str)
41
+
42
+ # Apply the filter conditions
43
+ if num_tools < tool_threshold and num_steps < step_threshold:
44
+ print(f"MATCH FOUND (Task ID: {question.get('task_id', 'N/A')}) - Tools: {num_tools}, Steps: {num_steps}")
45
+ print(question) # Print the matching question dictionary
46
+ print("------------------------------------------------------------------")
47
+ tempo_filtered.append(question) # Add to the filtered list
48
+ # else: # Optional: Handle items that don't match the filter
49
+ # print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Tools: {num_tools}, Steps: {num_steps}")
50
+ except ValueError:
51
+ # Handle cases where 'Number of tools' or 'Number of steps' is not a valid integer
52
+ print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Could not convert tool/step count to integer.")
53
+ print("------------------------------------------------------------------")
54
+ filtered_data=tempo_filtered
55
 
56
  # Prepare data structures for the API
57
  questions_for_api: List[Dict[str, str]] = []
 
184
  # Ensure the schema matches if columns were added/modified.
185
  # Use 'train' split convention.
186
  updated_ds = DatasetDict({'train': Dataset.from_pandas(df)})
187
+ pritn(updated_ds)
188
+ #updated_ds.push_to_hub(HF_DATASET_ID) # Token should be picked up from env or login
189
  logger.info("Dataset push successful.")
190
  return True
191
  else:
 
327
  local_port = int(os.getenv("PORT", "8000"))
328
  logger.info(f"Running Uvicorn locally on port: {local_port}")
329
  # Note: host='127.0.0.1' is usually fine for local runs outside docker
330
+ load_dataset()
331
  uvicorn.run(app, host="127.0.0.1", port=local_port, log_level="info")