mobrobro commited on
Commit
74eacfa
·
verified ·
1 Parent(s): 4705610

Update app.py

Browse files

This implementation now fully separates the processing and submission steps with these key enhancements:

Two separate buttons:

"Process Questions" - Runs the agent on all questions and saves answers to cache
"Submit Answers" - Submits the previously saved answers to the evaluation server


Three persistent files:

cached_questions.json - Stores the questions
cached_answers.json - Stores the agent's responses and extracted answers
submission_ready.json - Stores the formatted data ready for submission


Two separate functions:

process_questions() - Focuses only on processing questions and saving answers
submit_answers() - Focuses only on submitting the saved answers



This approach provides several advantages:

You can process questions over time without worrying about submission rate limits
If the submission fails, you don't have to reprocess all questions
You can examine and potentially fix answers before submission
It's easier to debug issues since processing and submission are completely separate

Files changed (1) hide show
  1. app.py +127 -156
app.py CHANGED
@@ -221,66 +221,17 @@ def extract_final_answer(agent_response):
221
 
222
  return "Unable to determine"
223
 
224
- # Simple rate-limited request function with retry
225
- def make_rate_limited_request(url, method="GET", max_retries=5, initial_wait=5, **kwargs):
226
- """
227
- Makes HTTP requests with automatic handling of rate limits (429)
228
-
229
- Args:
230
- url: The URL to request
231
- method: HTTP method (GET, POST, etc.)
232
- max_retries: Maximum number of retries for rate limit errors
233
- initial_wait: Initial wait time in seconds, doubled on each retry
234
- **kwargs: Additional arguments to pass to requests.request
235
-
236
- Returns:
237
- requests.Response object on success
238
-
239
- Raises:
240
- Exception if max_retries is exceeded
241
- """
242
- wait_time = initial_wait
243
-
244
- for attempt in range(max_retries):
245
- try:
246
- response = requests.request(method, url, **kwargs)
247
-
248
- # If not rate limited, return the response
249
- if response.status_code != 429:
250
- return response
251
-
252
- # Handle rate limiting
253
- retry_after = response.headers.get('Retry-After')
254
- if retry_after:
255
- # If server specified wait time, use that
256
- wait_seconds = int(retry_after)
257
- print(f"Rate limited. Server requested wait of {wait_seconds} seconds.")
258
- else:
259
- # Otherwise use exponential backoff
260
- wait_seconds = wait_time
261
- wait_time *= 2 # Double the wait time for next attempt
262
- print(f"Rate limited. Using exponential backoff: waiting {wait_seconds} seconds.")
263
-
264
- # Sleep and retry
265
- time.sleep(wait_seconds)
266
-
267
- except requests.exceptions.RequestException as e:
268
- print(f"Request error: {e}")
269
- # For connection errors, wait and retry
270
- time.sleep(wait_time)
271
- wait_time *= 2
272
-
273
- # If we get here, we've exceeded max_retries
274
- raise Exception(f"Failed to get a valid response after {max_retries} attempts")
275
 
276
- def run_and_submit_all(profile: gr.OAuthProfile | None):
277
  """
278
- Fetches all questions, runs the SmolaAgent on them, submits all answers,
279
- and displays the results. Uses caching and handles rate limits.
280
  """
281
  # --- Determine HF Space Runtime URL and Repo URL ---
282
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
283
-
284
  if profile:
285
  username = f"{profile.username}"
286
  print(f"User logged in: {username}")
@@ -288,94 +239,35 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
288
  print("User not logged in.")
289
  return "Please Login to Hugging Face with the button.", None
290
 
291
- api_url = DEFAULT_API_URL
292
- questions_url = f"{api_url}/questions"
293
- submit_url = f"{api_url}/submit"
294
-
295
  # 1. Instantiate Agent
296
  try:
297
  agent = SmolaAgent()
298
  except Exception as e:
299
  print(f"Error instantiating agent: {e}")
300
  return f"Error initializing agent: {e}", None
301
-
302
- # In the case of an app running as a hugging Face space, this link points toward your codebase
303
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
304
- print(agent_code)
305
 
306
- # 2. Use cached questions or fetch with rate limiting
307
- cache_file = "cached_questions.json"
308
-
309
- # Try to load from cache first
310
- if os.path.exists(cache_file) and os.path.getsize(cache_file) > 10:
311
- print(f"Loading cached questions from {cache_file}")
312
  try:
313
- with open(cache_file, 'r') as f:
314
  questions_data = json.load(f)
315
  print(f"Loaded {len(questions_data)} questions from cache")
316
  except Exception as e:
317
  print(f"Error loading cached questions: {e}")
318
- questions_data = None
319
  else:
320
- questions_data = None
321
-
322
- # Fetch if not cached
323
- if not questions_data:
324
- print("Fetching questions with rate limit handling...")
325
- try:
326
- # Manually implement a retry with long waits
327
- max_attempts = 5
328
- base_wait = 20 # Start with a long wait time
329
-
330
- for attempt in range(max_attempts):
331
- print(f"Attempt {attempt+1}/{max_attempts} to fetch questions")
332
-
333
- try:
334
- response = requests.get(questions_url, timeout=15)
335
-
336
- if response.status_code == 200:
337
- questions_data = response.json()
338
- print(f"Successfully fetched {len(questions_data)} questions")
339
-
340
- # Cache for future use
341
- try:
342
- with open(cache_file, 'w') as f:
343
- json.dump(questions_data, f)
344
- print(f"Cached {len(questions_data)} questions to {cache_file}")
345
- except Exception as e:
346
- print(f"Warning: Failed to cache questions: {e}")
347
-
348
- break # Success, exit retry loop
349
-
350
- elif response.status_code == 429:
351
- wait_time = base_wait * (2 ** attempt)
352
- print(f"Rate limited (429). Waiting {wait_time} seconds before retry...")
353
- time.sleep(wait_time)
354
- else:
355
- print(f"Unexpected status code: {response.status_code}")
356
- time.sleep(base_wait)
357
-
358
- except requests.exceptions.RequestException as e:
359
- print(f"Request error: {e}")
360
- time.sleep(base_wait)
361
-
362
- if not questions_data:
363
- return "Failed to fetch questions after multiple attempts. Please try again later.", None
364
-
365
- except Exception as e:
366
- print(f"Error fetching questions: {e}")
367
- return f"Error fetching questions: {e}", None
368
 
369
  # 3. Run your Agent
370
  results_log = []
371
- answers_payload = []
372
- answers_cache_file = "cached_answers.json"
373
 
374
  # Try to load cached answers
375
  cached_answers = {}
376
- if os.path.exists(answers_cache_file):
377
  try:
378
- with open(answers_cache_file, 'r') as f:
379
  cached_answers = json.load(f)
380
  print(f"Loaded {len(cached_answers)} cached answers")
381
  except Exception as e:
@@ -395,10 +287,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
395
  print(f"Using cached answer for task {task_id}")
396
  full_response = cached_answers[task_id]['full_response']
397
  submitted_answer = cached_answers[task_id]['submitted_answer']
 
398
  else:
399
  try:
400
  # Check for associated files with manual retry
401
  try:
 
402
  files_url = f"{api_url}/files/{task_id}"
403
  files_response = requests.get(files_url, timeout=15)
404
  if files_response.status_code == 200:
@@ -421,23 +315,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
421
 
422
  # Save to cache after each question to avoid losing progress
423
  try:
424
- with open(answers_cache_file, 'w') as f:
425
  json.dump(cached_answers, f)
426
  except Exception as e:
427
  print(f"Warning: Failed to save answer cache: {e}")
428
 
 
 
429
  except Exception as e:
430
  print(f"Error running agent on task {task_id}: {e}")
431
  full_response = f"AGENT ERROR: {e}"
432
  submitted_answer = "Unable to determine"
433
 
434
- # Add to submission payload
435
- answers_payload.append({
436
- "task_id": task_id,
437
- "submitted_answer": submitted_answer,
438
- "reasoning_trace": full_response
439
- })
440
-
441
  # Log for display
442
  results_log.append({
443
  "Task ID": task_id,
@@ -447,18 +336,70 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
447
  })
448
 
449
  print(f"Processed task {task_id}, answer: {submitted_answer}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
- if not answers_payload:
452
- print("Agent did not produce any answers to submit.")
453
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
454
-
455
- # 4. Prepare Submission
456
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
457
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
458
- print(status_update)
459
-
460
- # 5. Submit with robust retry mechanism
461
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  try:
463
  # Use manual retry for submission
464
  max_attempts = 5
@@ -480,8 +421,31 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
480
  f"Message: {result_data.get('message', 'No message received.')}"
481
  )
482
  print("Submission successful.")
483
- results_df = pd.DataFrame(results_log)
484
- return final_status, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
 
486
  elif response.status_code == 429:
487
  wait_time = base_wait * (2 ** attempt)
@@ -499,8 +463,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
499
  # For non-429 errors, don't retry
500
  status_message = f"Submission Failed: {error_detail}"
501
  print(status_message)
502
- results_df = pd.DataFrame(results_log)
503
- return status_message, results_df
504
 
505
  except requests.exceptions.RequestException as e:
506
  print(f"Request error during submission: {e}")
@@ -509,14 +472,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
509
  # If we get here, all attempts failed
510
  status_message = f"Submission Failed: Maximum retry attempts exceeded."
511
  print(status_message)
512
- results_df = pd.DataFrame(results_log)
513
- return status_message, results_df
514
 
515
  except Exception as e:
516
  status_message = f"An unexpected error occurred during submission: {e}"
517
  print(status_message)
518
- results_df = pd.DataFrame(results_log)
519
- return status_message, results_df
520
 
521
  # --- Build Gradio Interface using Blocks ---
522
  with gr.Blocks() as demo:
@@ -525,25 +486,35 @@ with gr.Blocks() as demo:
525
  """
526
  **Instructions:**
527
  1. Log in to your Hugging Face account using the button below.
528
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
529
  ---
530
- **Note:** This process will take some time as the agent processes each question. The agent is specifically configured to
531
  format answers according to the GAIA benchmark requirements:
532
  - Numbers: No commas, no units
533
  - Strings: No articles, no abbreviations
534
  - Lists: Comma-separated values following the above rules
 
 
535
  """
536
  )
537
 
538
  gr.LoginButton()
539
 
540
- run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
541
 
542
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
543
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
544
 
545
- run_button.click(
546
- fn=run_and_submit_all,
 
 
 
 
 
547
  outputs=[status_output, results_table]
548
  )
549
 
 
221
 
222
  return "Unable to determine"
223
 
224
+ # Constants for file paths
225
+ QUESTIONS_CACHE_FILE = "cached_questions.json"
226
+ ANSWERS_CACHE_FILE = "cached_answers.json"
227
+ SUBMISSION_READY_FILE = "submission_ready.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
+ def process_questions(profile: gr.OAuthProfile | None):
230
  """
231
+ Processes all questions using the agent and saves the answers to cache.
232
+ Does not submit the answers.
233
  """
234
  # --- Determine HF Space Runtime URL and Repo URL ---
 
 
235
  if profile:
236
  username = f"{profile.username}"
237
  print(f"User logged in: {username}")
 
239
  print("User not logged in.")
240
  return "Please Login to Hugging Face with the button.", None
241
 
 
 
 
 
242
  # 1. Instantiate Agent
243
  try:
244
  agent = SmolaAgent()
245
  except Exception as e:
246
  print(f"Error instantiating agent: {e}")
247
  return f"Error initializing agent: {e}", None
 
 
 
 
248
 
249
+ # 2. Use cached questions only
250
+ if os.path.exists(QUESTIONS_CACHE_FILE) and os.path.getsize(QUESTIONS_CACHE_FILE) > 10:
251
+ print(f"Loading cached questions from {QUESTIONS_CACHE_FILE}")
 
 
 
252
  try:
253
+ with open(QUESTIONS_CACHE_FILE, 'r') as f:
254
  questions_data = json.load(f)
255
  print(f"Loaded {len(questions_data)} questions from cache")
256
  except Exception as e:
257
  print(f"Error loading cached questions: {e}")
258
+ return f"Error loading cached questions: {e}", None
259
  else:
260
+ return "No cached questions found. Please create a cached_questions.json file.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  # 3. Run your Agent
263
  results_log = []
264
+ processed_count = 0
 
265
 
266
  # Try to load cached answers
267
  cached_answers = {}
268
+ if os.path.exists(ANSWERS_CACHE_FILE):
269
  try:
270
+ with open(ANSWERS_CACHE_FILE, 'r') as f:
271
  cached_answers = json.load(f)
272
  print(f"Loaded {len(cached_answers)} cached answers")
273
  except Exception as e:
 
287
  print(f"Using cached answer for task {task_id}")
288
  full_response = cached_answers[task_id]['full_response']
289
  submitted_answer = cached_answers[task_id]['submitted_answer']
290
+ processed_count += 1
291
  else:
292
  try:
293
  # Check for associated files with manual retry
294
  try:
295
+ api_url = DEFAULT_API_URL
296
  files_url = f"{api_url}/files/{task_id}"
297
  files_response = requests.get(files_url, timeout=15)
298
  if files_response.status_code == 200:
 
315
 
316
  # Save to cache after each question to avoid losing progress
317
  try:
318
+ with open(ANSWERS_CACHE_FILE, 'w') as f:
319
  json.dump(cached_answers, f)
320
  except Exception as e:
321
  print(f"Warning: Failed to save answer cache: {e}")
322
 
323
+ processed_count += 1
324
+
325
  except Exception as e:
326
  print(f"Error running agent on task {task_id}: {e}")
327
  full_response = f"AGENT ERROR: {e}"
328
  submitted_answer = "Unable to determine"
329
 
 
 
 
 
 
 
 
330
  # Log for display
331
  results_log.append({
332
  "Task ID": task_id,
 
336
  })
337
 
338
  print(f"Processed task {task_id}, answer: {submitted_answer}")
339
+
340
+ # Prepare submission data and save for later submission
341
+ space_id = os.getenv("SPACE_ID")
342
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
343
+
344
+ submission_data = {
345
+ "username": username.strip(),
346
+ "agent_code": agent_code,
347
+ "answers": [
348
+ {
349
+ "task_id": task_id,
350
+ "submitted_answer": cached_answers[task_id]["submitted_answer"],
351
+ "reasoning_trace": cached_answers[task_id]["full_response"]
352
+ }
353
+ for task_id in cached_answers
354
+ ]
355
+ }
356
+
357
+ # Save submission data for later use
358
+ try:
359
+ with open(SUBMISSION_READY_FILE, 'w') as f:
360
+ json.dump(submission_data, f)
361
+ print(f"Saved submission data to {SUBMISSION_READY_FILE}")
362
+ except Exception as e:
363
+ print(f"Warning: Failed to save submission data: {e}")
364
+
365
+ status_message = f"Processing complete. Processed {processed_count} questions. Ready for submission."
366
+ print(status_message)
367
+
368
+ results_df = pd.DataFrame(results_log)
369
+ return status_message, results_df
370
 
371
+ def submit_answers(profile: gr.OAuthProfile | None):
372
+ """
373
+ Submits previously processed answers to the evaluation server.
374
+ """
375
+ if profile:
376
+ username = f"{profile.username}"
377
+ print(f"User logged in: {username}")
378
+ else:
379
+ print("User not logged in.")
380
+ return "Please Login to Hugging Face with the button.", None
381
+
382
+ # Check if submission data exists
383
+ if not os.path.exists(SUBMISSION_READY_FILE):
384
+ return "No submission data found. Please process questions first.", None
385
+
386
+ # Load submission data
387
+ try:
388
+ with open(SUBMISSION_READY_FILE, 'r') as f:
389
+ submission_data = json.load(f)
390
+ print(f"Loaded submission data with {len(submission_data['answers'])} answers")
391
+ except Exception as e:
392
+ print(f"Error loading submission data: {e}")
393
+ return f"Error loading submission data: {e}", None
394
+
395
+ # Update username in case it's different
396
+ submission_data["username"] = username.strip()
397
+
398
+ # Submit with robust retry mechanism
399
+ api_url = DEFAULT_API_URL
400
+ submit_url = f"{api_url}/submit"
401
+ print(f"Submitting {len(submission_data['answers'])} answers to: {submit_url}")
402
+
403
  try:
404
  # Use manual retry for submission
405
  max_attempts = 5
 
421
  f"Message: {result_data.get('message', 'No message received.')}"
422
  )
423
  print("Submission successful.")
424
+
425
+ # Load and return results for display
426
+ try:
427
+ with open(ANSWERS_CACHE_FILE, 'r') as f:
428
+ cached_answers = json.load(f)
429
+
430
+ # Load questions to display alongside answers
431
+ with open(QUESTIONS_CACHE_FILE, 'r') as f:
432
+ questions_data = json.load(f)
433
+
434
+ question_map = {q["task_id"]: q["question"] for q in questions_data}
435
+
436
+ results_log = [
437
+ {
438
+ "Task ID": task_id,
439
+ "Question": question_map.get(task_id, "Unknown"),
440
+ "Submitted Answer": cached_answers[task_id]["submitted_answer"]
441
+ }
442
+ for task_id in cached_answers
443
+ ]
444
+
445
+ return final_status, pd.DataFrame(results_log)
446
+ except Exception as e:
447
+ print(f"Error preparing results display: {e}")
448
+ return final_status, None
449
 
450
  elif response.status_code == 429:
451
  wait_time = base_wait * (2 ** attempt)
 
463
  # For non-429 errors, don't retry
464
  status_message = f"Submission Failed: {error_detail}"
465
  print(status_message)
466
+ return status_message, None
 
467
 
468
  except requests.exceptions.RequestException as e:
469
  print(f"Request error during submission: {e}")
 
472
  # If we get here, all attempts failed
473
  status_message = f"Submission Failed: Maximum retry attempts exceeded."
474
  print(status_message)
475
+ return status_message, None
 
476
 
477
  except Exception as e:
478
  status_message = f"An unexpected error occurred during submission: {e}"
479
  print(status_message)
480
+ return status_message, None
 
481
 
482
  # --- Build Gradio Interface using Blocks ---
483
  with gr.Blocks() as demo:
 
486
  """
487
  **Instructions:**
488
  1. Log in to your Hugging Face account using the button below.
489
+ 2. Click 'Process Questions' to run the agent on all questions and save answers.
490
+ 3. After processing is complete, click 'Submit Answers' to submit the answers to the evaluation server.
491
  ---
492
+ **Note:** Processing questions will take time as the agent processes each question. The agent is specifically configured to
493
  format answers according to the GAIA benchmark requirements:
494
  - Numbers: No commas, no units
495
  - Strings: No articles, no abbreviations
496
  - Lists: Comma-separated values following the above rules
497
+
498
+ Separating processing and submission helps avoid losing work due to rate limiting or other errors.
499
  """
500
  )
501
 
502
  gr.LoginButton()
503
 
504
+ with gr.Row():
505
+ process_button = gr.Button("Process Questions")
506
+ submit_button = gr.Button("Submit Answers")
507
 
508
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
509
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
510
 
511
+ process_button.click(
512
+ fn=process_questions,
513
+ outputs=[status_output, results_table]
514
+ )
515
+
516
+ submit_button.click(
517
+ fn=submit_answers,
518
  outputs=[status_output, results_table]
519
  )
520