Final_Assignment_Template

Sleeping

App Files Files Community

mobrobro commited on 5 days ago

Commit

74eacfa

verified ·

1 Parent(s): 4705610

Update app.py

Browse files

This implementation now fully separates the processing and submission steps with these key enhancements:

Two separate buttons:

"Process Questions" - Runs the agent on all questions and saves answers to cache
"Submit Answers" - Submits the previously saved answers to the evaluation server

Three persistent files:

cached_questions.json - Stores the questions
cached_answers.json - Stores the agent's responses and extracted answers
submission_ready.json - Stores the formatted data ready for submission

Two separate functions:

process_questions() - Focuses only on processing questions and saving answers
submit_answers() - Focuses only on submitting the saved answers

This approach provides several advantages:

You can process questions over time without worrying about submission rate limits
If the submission fails, you don't have to reprocess all questions
You can examine and potentially fix answers before submission
It's easier to debug issues since processing and submission are completely separate

Files changed (1) hide show

app.py +127 -156

app.py CHANGED Viewed

@@ -221,66 +221,17 @@ def extract_final_answer(agent_response):
     return "Unable to determine"
-# Simple rate-limited request function with retry
-def make_rate_limited_request(url, method="GET", max_retries=5, initial_wait=5, **kwargs):
-    """
-    Makes HTTP requests with automatic handling of rate limits (429)
-    Args:
-        url: The URL to request
-        method: HTTP method (GET, POST, etc.)
-        max_retries: Maximum number of retries for rate limit errors
-        initial_wait: Initial wait time in seconds, doubled on each retry
-        **kwargs: Additional arguments to pass to requests.request
-    Returns:
-        requests.Response object on success
-    Raises:
-        Exception if max_retries is exceeded
-    """
-    wait_time = initial_wait
-    for attempt in range(max_retries):
-        try:
-            response = requests.request(method, url, **kwargs)
-            # If not rate limited, return the response
-            if response.status_code != 429:
-                return response
-            # Handle rate limiting
-            retry_after = response.headers.get('Retry-After')
-            if retry_after:
-                # If server specified wait time, use that
-                wait_seconds = int(retry_after)
-                print(f"Rate limited. Server requested wait of {wait_seconds} seconds.")
-            else:
-                # Otherwise use exponential backoff
-                wait_seconds = wait_time
-                wait_time *= 2  # Double the wait time for next attempt
-                print(f"Rate limited. Using exponential backoff: waiting {wait_seconds} seconds.")
-            # Sleep and retry
-            time.sleep(wait_seconds)
-        except requests.exceptions.RequestException as e:
-            print(f"Request error: {e}")
-            # For connection errors, wait and retry
-            time.sleep(wait_time)
-            wait_time *= 2
-    # If we get here, we've exceeded max_retries
-    raise Exception(f"Failed to get a valid response after {max_retries} attempts")
-def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the SmolaAgent on them, submits all answers,
-    and displays the results. Uses caching and handles rate limits.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
@@ -288,94 +239,35 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
-    api_url = DEFAULT_API_URL
-    questions_url = f"{api_url}/questions"
-    submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent
     try:
         agent = SmolaAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    print(agent_code)
-    # 2. Use cached questions or fetch with rate limiting
-    cache_file = "cached_questions.json"
-    # Try to load from cache first
-    if os.path.exists(cache_file) and os.path.getsize(cache_file) > 10:
-        print(f"Loading cached questions from {cache_file}")
         try:
-            with open(cache_file, 'r') as f:
                 questions_data = json.load(f)
             print(f"Loaded {len(questions_data)} questions from cache")
         except Exception as e:
             print(f"Error loading cached questions: {e}")
-            questions_data = None
     else:
-        questions_data = None
-    # Fetch if not cached
-    if not questions_data:
-        print("Fetching questions with rate limit handling...")
-        try:
-            # Manually implement a retry with long waits
-            max_attempts = 5
-            base_wait = 20  # Start with a long wait time
-            for attempt in range(max_attempts):
-                print(f"Attempt {attempt+1}/{max_attempts} to fetch questions")
-                try:
-                    response = requests.get(questions_url, timeout=15)
-                    if response.status_code == 200:
-                        questions_data = response.json()
-                        print(f"Successfully fetched {len(questions_data)} questions")
-                        # Cache for future use
-                        try:
-                            with open(cache_file, 'w') as f:
-                                json.dump(questions_data, f)
-                            print(f"Cached {len(questions_data)} questions to {cache_file}")
-                        except Exception as e:
-                            print(f"Warning: Failed to cache questions: {e}")
-                        break  # Success, exit retry loop
-                    elif response.status_code == 429:
-                        wait_time = base_wait * (2 ** attempt)
-                        print(f"Rate limited (429). Waiting {wait_time} seconds before retry...")
-                        time.sleep(wait_time)
-                    else:
-                        print(f"Unexpected status code: {response.status_code}")
-                        time.sleep(base_wait)
-                except requests.exceptions.RequestException as e:
-                    print(f"Request error: {e}")
-                    time.sleep(base_wait)
-            if not questions_data:
-                return "Failed to fetch questions after multiple attempts. Please try again later.", None
-        except Exception as e:
-            print(f"Error fetching questions: {e}")
-            return f"Error fetching questions: {e}", None
     # 3. Run your Agent
     results_log = []
-    answers_payload = []
-    answers_cache_file = "cached_answers.json"
     # Try to load cached answers
     cached_answers = {}
-    if os.path.exists(answers_cache_file):
         try:
-            with open(answers_cache_file, 'r') as f:
                 cached_answers = json.load(f)
             print(f"Loaded {len(cached_answers)} cached answers")
         except Exception as e:
@@ -395,10 +287,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Using cached answer for task {task_id}")
             full_response = cached_answers[task_id]['full_response']
             submitted_answer = cached_answers[task_id]['submitted_answer']
         else:
             try:
                 # Check for associated files with manual retry
                 try:
                     files_url = f"{api_url}/files/{task_id}"
                     files_response = requests.get(files_url, timeout=15)
                     if files_response.status_code == 200:
@@ -421,23 +315,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                 # Save to cache after each question to avoid losing progress
                 try:
-                    with open(answers_cache_file, 'w') as f:
                         json.dump(cached_answers, f)
                 except Exception as e:
                     print(f"Warning: Failed to save answer cache: {e}")
             except Exception as e:
                 print(f"Error running agent on task {task_id}: {e}")
                 full_response = f"AGENT ERROR: {e}"
                 submitted_answer = "Unable to determine"
-        # Add to submission payload
-        answers_payload.append({
-            "task_id": task_id,
-            "submitted_answer": submitted_answer,
-            "reasoning_trace": full_response
-        })
         # Log for display
         results_log.append({
             "Task ID": task_id,
@@ -447,18 +336,70 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         })
         print(f"Processed task {task_id}, answer: {submitted_answer}")
-    if not answers_payload:
-        print("Agent did not produce any answers to submit.")
-        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit with robust retry mechanism
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
         # Use manual retry for submission
         max_attempts = 5
@@ -480,8 +421,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                         f"Message: {result_data.get('message', 'No message received.')}"
                     )
                     print("Submission successful.")
-                    results_df = pd.DataFrame(results_log)
-                    return final_status, results_df
                 elif response.status_code == 429:
                     wait_time = base_wait * (2 ** attempt)
@@ -499,8 +463,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
                     # For non-429 errors, don't retry
                     status_message = f"Submission Failed: {error_detail}"
                     print(status_message)
-                    results_df = pd.DataFrame(results_log)
-                    return status_message, results_df
             except requests.exceptions.RequestException as e:
                 print(f"Request error during submission: {e}")
@@ -509,14 +472,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         # If we get here, all attempts failed
         status_message = f"Submission Failed: Maximum retry attempts exceeded."
         print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
@@ -525,25 +486,35 @@ with gr.Blocks() as demo:
         """
         **Instructions:**
         1. Log in to your Hugging Face account using the button below.
-        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
-        **Note:** This process will take some time as the agent processes each question. The agent is specifically configured to
         format answers according to the GAIA benchmark requirements:
         - Numbers: No commas, no units
         - Strings: No articles, no abbreviations
         - Lists: Comma-separated values following the above rules
         """
     )
     gr.LoginButton()
-    run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(
-        fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )

     return "Unable to determine"
+# Constants for file paths
+QUESTIONS_CACHE_FILE = "cached_questions.json"
+ANSWERS_CACHE_FILE = "cached_answers.json"
+SUBMISSION_READY_FILE = "submission_ready.json"
+def process_questions(profile: gr.OAuthProfile | None):
     """
+    Processes all questions using the agent and saves the answers to cache.
+    Does not submit the answers.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     if profile:
         username = f"{profile.username}"
         print(f"User logged in: {username}")
         print("User not logged in.")
         return "Please Login to Hugging Face with the button.", None
     # 1. Instantiate Agent
     try:
         agent = SmolaAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # 2. Use cached questions only
+    if os.path.exists(QUESTIONS_CACHE_FILE) and os.path.getsize(QUESTIONS_CACHE_FILE) > 10:
+        print(f"Loading cached questions from {QUESTIONS_CACHE_FILE}")
         try:
+            with open(QUESTIONS_CACHE_FILE, 'r') as f:
                 questions_data = json.load(f)
             print(f"Loaded {len(questions_data)} questions from cache")
         except Exception as e:
             print(f"Error loading cached questions: {e}")
+            return f"Error loading cached questions: {e}", None
     else:
+        return "No cached questions found. Please create a cached_questions.json file.", None
     # 3. Run your Agent
     results_log = []
+    processed_count = 0
     # Try to load cached answers
     cached_answers = {}
+    if os.path.exists(ANSWERS_CACHE_FILE):
         try:
+            with open(ANSWERS_CACHE_FILE, 'r') as f:
                 cached_answers = json.load(f)
             print(f"Loaded {len(cached_answers)} cached answers")
         except Exception as e:
             print(f"Using cached answer for task {task_id}")
             full_response = cached_answers[task_id]['full_response']
             submitted_answer = cached_answers[task_id]['submitted_answer']
+            processed_count += 1
         else:
             try:
                 # Check for associated files with manual retry
                 try:
+                    api_url = DEFAULT_API_URL
                     files_url = f"{api_url}/files/{task_id}"
                     files_response = requests.get(files_url, timeout=15)
                     if files_response.status_code == 200:
                 # Save to cache after each question to avoid losing progress
                 try:
+                    with open(ANSWERS_CACHE_FILE, 'w') as f:
                         json.dump(cached_answers, f)
                 except Exception as e:
                     print(f"Warning: Failed to save answer cache: {e}")
+                processed_count += 1
             except Exception as e:
                 print(f"Error running agent on task {task_id}: {e}")
                 full_response = f"AGENT ERROR: {e}"
                 submitted_answer = "Unable to determine"
         # Log for display
         results_log.append({
             "Task ID": task_id,
         })
         print(f"Processed task {task_id}, answer: {submitted_answer}")
+    # Prepare submission data and save for later submission
+    space_id = os.getenv("SPACE_ID")
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": [
+            {
+                "task_id": task_id,
+                "submitted_answer": cached_answers[task_id]["submitted_answer"],
+                "reasoning_trace": cached_answers[task_id]["full_response"]
+            }
+            for task_id in cached_answers
+        ]
+    }
+    # Save submission data for later use
+    try:
+        with open(SUBMISSION_READY_FILE, 'w') as f:
+            json.dump(submission_data, f)
+        print(f"Saved submission data to {SUBMISSION_READY_FILE}")
+    except Exception as e:
+        print(f"Warning: Failed to save submission data: {e}")
+    status_message = f"Processing complete. Processed {processed_count} questions. Ready for submission."
+    print(status_message)
+    results_df = pd.DataFrame(results_log)
+    return status_message, results_df
+def submit_answers(profile: gr.OAuthProfile | None):
+    """
+    Submits previously processed answers to the evaluation server.
+    """
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    # Check if submission data exists
+    if not os.path.exists(SUBMISSION_READY_FILE):
+        return "No submission data found. Please process questions first.", None
+    # Load submission data
+    try:
+        with open(SUBMISSION_READY_FILE, 'r') as f:
+            submission_data = json.load(f)
+        print(f"Loaded submission data with {len(submission_data['answers'])} answers")
+    except Exception as e:
+        print(f"Error loading submission data: {e}")
+        return f"Error loading submission data: {e}", None
+    # Update username in case it's different
+    submission_data["username"] = username.strip()
+    # Submit with robust retry mechanism
+    api_url = DEFAULT_API_URL
+    submit_url = f"{api_url}/submit"
+    print(f"Submitting {len(submission_data['answers'])} answers to: {submit_url}")
     try:
         # Use manual retry for submission
         max_attempts = 5
                         f"Message: {result_data.get('message', 'No message received.')}"
                     )
                     print("Submission successful.")
+                    # Load and return results for display
+                    try:
+                        with open(ANSWERS_CACHE_FILE, 'r') as f:
+                            cached_answers = json.load(f)
+                        # Load questions to display alongside answers
+                        with open(QUESTIONS_CACHE_FILE, 'r') as f:
+                            questions_data = json.load(f)
+                        question_map = {q["task_id"]: q["question"] for q in questions_data}
+                        results_log = [
+                            {
+                                "Task ID": task_id,
+                                "Question": question_map.get(task_id, "Unknown"),
+                                "Submitted Answer": cached_answers[task_id]["submitted_answer"]
+                            }
+                            for task_id in cached_answers
+                        ]
+                        return final_status, pd.DataFrame(results_log)
+                    except Exception as e:
+                        print(f"Error preparing results display: {e}")
+                        return final_status, None
                 elif response.status_code == 429:
                     wait_time = base_wait * (2 ** attempt)
                     # For non-429 errors, don't retry
                     status_message = f"Submission Failed: {error_detail}"
                     print(status_message)
+                    return status_message, None
             except requests.exceptions.RequestException as e:
                 print(f"Request error during submission: {e}")
         # If we get here, all attempts failed
         status_message = f"Submission Failed: Maximum retry attempts exceeded."
         print(status_message)
+        return status_message, None
     except Exception as e:
         status_message = f"An unexpected error occurred during submission: {e}"
         print(status_message)
+        return status_message, None
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
         """
         **Instructions:**
         1. Log in to your Hugging Face account using the button below.
+        2. Click 'Process Questions' to run the agent on all questions and save answers.
+        3. After processing is complete, click 'Submit Answers' to submit the answers to the evaluation server.
         ---
+        **Note:** Processing questions will take time as the agent processes each question. The agent is specifically configured to
         format answers according to the GAIA benchmark requirements:
         - Numbers: No commas, no units
         - Strings: No articles, no abbreviations
         - Lists: Comma-separated values following the above rules
+        Separating processing and submission helps avoid losing work due to rate limiting or other errors.
         """
     )
     gr.LoginButton()
+    with gr.Row():
+        process_button = gr.Button("Process Questions")
+        submit_button = gr.Button("Submit Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    process_button.click(
+        fn=process_questions,
+        outputs=[status_output, results_table]
+    )
+    submit_button.click(
+        fn=submit_answers,
         outputs=[status_output, results_table]
     )