Spaces:

agents-course
/

Unit4_scoring

Running

App Files Files Community

Jofthomas commited on 8 days ago

Commit

9f53a53

verified ·

1 Parent(s): 571c3b5

Update main.py

Browse files

Files changed (1) hide show

main.py +38 -21

main.py CHANGED Viewed

@@ -9,6 +9,9 @@ from datetime import datetime, timezone
 import logging
 import uvicorn # To run the app
 # --- Configuration ---
 HF_DATASET_ID = "agents-course/unit4-students-scores"
 # Ensure you have write access to this dataset repository on Hugging Face
@@ -17,26 +20,38 @@ HF_DATASET_ID = "agents-course/unit4-students-scores"
 # --- Logging Setup ---
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# --- Load and Prepare Filtered Questions ---
-# Placeholder: Replace this with your actual filtered data loading logic
-# This data MUST contain 'task_id', 'Question', and 'Final answer'
-# Example structure:
-# filtered_data = [
-#     {'task_id': 'e1fc63a2-da7a-432f-be78-7c4a95598703', 'Question': 'If Eliud Kipchoge...', 'Final answer': '17', ... other keys ...},
-#     {'task_id': 'example_pass', 'Question': 'Another question', 'Final answer': '42', ... other keys ...},
-#     # ... more filtered questions
-# ]
-# Let's simulate loading your filtered data (replace with your actual loading)
-# Assuming you have the 'filtered_questions' list from the previous step's code
-# Example data if you don't have it handy:
-filtered_data = [
-    {'task_id': 'q1', 'Question': 'What is 2+2?', 'Level': '1', 'Final answer': '4', 'Annotator Metadata': {'Number of steps': '1', 'Number of tools': '1'}},
-    {'task_id': 'q2', 'Question': 'Capital of France?', 'Level': '1', 'Final answer': 'Paris', 'Annotator Metadata': {'Number of steps': '1', 'Number of tools': '1'}},
-    {'task_id': 'q3', 'Question': '10 / 2 ?', 'Level': '1', 'Final answer': '5', 'Annotator Metadata': {'Number of steps': '1', 'Number of tools': '1'}}
-]
-# filtered_data = filtered_questions # Uncomment this if you have the list from previous step
 # Prepare data structures for the API
 questions_for_api: List[Dict[str, str]] = []
@@ -169,7 +184,8 @@ def update_huggingface_dataset(username: str, score: float):
             # Ensure the schema matches if columns were added/modified.
             # Use 'train' split convention.
             updated_ds = DatasetDict({'train': Dataset.from_pandas(df)})
-            updated_ds.push_to_hub(HF_DATASET_ID) # Token should be picked up from env or login
             logger.info("Dataset push successful.")
             return True
         else:
@@ -311,4 +327,5 @@ if __name__ == "__main__":
         local_port = int(os.getenv("PORT", "8000"))
         logger.info(f"Running Uvicorn locally on port: {local_port}")
         # Note: host='127.0.0.1' is usually fine for local runs outside docker
         uvicorn.run(app, host="127.0.0.1", port=local_port, log_level="info")

 import logging
 import uvicorn # To run the app
+tool_threshold = 3
+step_threshold = 5
 # --- Configuration ---
 HF_DATASET_ID = "agents-course/unit4-students-scores"
 # Ensure you have write access to this dataset repository on Hugging Face
 # --- Logging Setup ---
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+filtered_dataset=None
+def load_dataset():
+    global filtered_dataset
+    tempo_filtered=[]
+    dataset=load_dataset("gaia-benchmark/GAIA","2023_level1",trust_remote_code=True)
+    for question in dataset['validation']:
+    metadata = question.get('Annotator Metadata') # Use .get() for safety
+    if metadata: # Check if 'Annotator Metadata' exists
+        num_tools_str = metadata.get('Number of tools')
+        num_steps_str = metadata.get('Number of steps')
+        # Check if both numbers exist before trying to convert
+        if num_tools_str is not None and num_steps_str is not None:
+            try:
+                # Convert values to integers for comparison
+                num_tools = int(num_tools_str)
+                num_steps = int(num_steps_str)
+                # Apply the filter conditions
+                if num_tools < tool_threshold and num_steps < step_threshold:
+                    print(f"MATCH FOUND (Task ID: {question.get('task_id', 'N/A')}) - Tools: {num_tools}, Steps: {num_steps}")
+                    print(question) # Print the matching question dictionary
+                    print("------------------------------------------------------------------")
+                    tempo_filtered.append(question) # Add to the filtered list
+                # else: # Optional: Handle items that don't match the filter
+                    # print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Tools: {num_tools}, Steps: {num_steps}")
+            except ValueError:
+                # Handle cases where 'Number of tools' or 'Number of steps' is not a valid integer
+                print(f"Skipping Task ID: {question.get('task_id', 'N/A')} - Could not convert tool/step count to integer.")
+                print("------------------------------------------------------------------")
+    filtered_data=tempo_filtered
 # Prepare data structures for the API
 questions_for_api: List[Dict[str, str]] = []
             # Ensure the schema matches if columns were added/modified.
             # Use 'train' split convention.
             updated_ds = DatasetDict({'train': Dataset.from_pandas(df)})
+            pritn(updated_ds)
+            #updated_ds.push_to_hub(HF_DATASET_ID) # Token should be picked up from env or login
             logger.info("Dataset push successful.")
             return True
         else:
         local_port = int(os.getenv("PORT", "8000"))
         logger.info(f"Running Uvicorn locally on port: {local_port}")
         # Note: host='127.0.0.1' is usually fine for local runs outside docker
+        load_dataset()
         uvicorn.run(app, host="127.0.0.1", port=local_port, log_level="info")