drAbreu commited on
Commit
cb2e2ec
·
1 Parent(s): a0bacd9

Improved the interface

Browse files
Files changed (2) hide show
  1. README.md +10 -1
  2. app.py +342 -66
README.md CHANGED
@@ -12,4 +12,13 @@ hf_oauth: true
12
  hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
12
  hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
16
+
17
+ ## Tag 1.0.0
18
+
19
+ - Basic agent without any tools obtains 1 / 20 correct answers using claude 3.7
20
+ -
21
+ ## Tag 1.1.0
22
+
23
+ - Adding web and wikipedia tools to single agent obtains 5 / 20 correct answers using claude 3.7
24
+
app.py CHANGED
@@ -3,15 +3,21 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from agents.llama_index_agent import GaiaAgent
7
  import asyncio
8
- # (Keep Constants as is)
 
 
 
 
 
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
11
 
12
- # --- Basic Agent Definition ---
13
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
-
15
  CLAUDE = {
16
  "model_provider": "anthropic",
17
  "model_name": "claude-3-7-sonnet-latest"
@@ -20,120 +26,362 @@ OPENAI = {
20
  "model_provider": "openai",
21
  "model_name": "gpt-4o"
22
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  class BasicAgent:
 
 
 
24
  def __init__(
25
  self,
26
- model_provider="openai",
27
- model_name="gpt-4o",
28
- api_key=None
 
 
29
  ):
30
  """
31
- Initialize the BasicAgent with configurable model settings.
32
 
33
  Args:
34
- model_provider: LLM provider to use (openai, anthropic, etc.)
35
  model_name: Specific model to use
36
- api_key: Optional API key (defaults to environment variable)
 
 
37
  """
38
- self.agent = GaiaAgent(**CLAUDE)
 
 
 
 
 
 
 
 
 
 
39
  print(f"BasicAgent initialized with {model_provider} {model_name}.")
40
 
 
 
 
 
41
  def __call__(self, question: str) -> str:
42
- """Process a GAIA benchmark question and return the formatted answer."""
 
 
 
43
  print(f"Agent received question (first 50 chars): {question[:50]}...")
44
 
45
  async def agentic_main():
46
- response = await self.agent.run(question)
47
- return response
48
 
49
- response = asyncio.run(agentic_main())
50
- final_answer = response.response.blocks[-1].text
51
  print(f"Agent returning answer: {final_answer}")
52
  return final_answer
53
 
54
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
55
  """
56
- Fetches all questions, runs the BasicAgent on them, submits all answers,
57
- and displays the results.
58
  """
59
  # --- Determine HF Space Runtime URL and Repo URL ---
60
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
61
 
62
- if profile:
63
- username= f"{profile.username}"
64
- print(f"User logged in: {username}")
65
- else:
66
  print("User not logged in.")
67
  return "Please Login to Hugging Face with the button.", None
68
 
 
 
 
69
  api_url = DEFAULT_API_URL
70
  questions_url = f"{api_url}/questions"
71
  submit_url = f"{api_url}/submit"
72
 
73
- # 1. Instantiate Agent ( modify this part to create your agent)
74
  try:
 
75
  agent = BasicAgent()
76
  except Exception as e:
77
  print(f"Error instantiating agent: {e}")
78
  return f"Error initializing agent: {e}", None
79
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
80
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
81
  print(agent_code)
82
 
83
  # 2. Fetch Questions
84
  print(f"Fetching questions from: {questions_url}")
 
85
  try:
86
- response = requests.get(questions_url, timeout=15)
 
 
 
 
 
 
 
 
87
  response.raise_for_status()
88
  questions_data = response.json()
 
89
  if not questions_data:
90
- print("Fetched questions list is empty.")
91
- return "Fetched questions list is empty or invalid format.", None
 
92
  print(f"Fetched {len(questions_data)} questions.")
 
 
93
  except requests.exceptions.RequestException as e:
94
  print(f"Error fetching questions: {e}")
95
  return f"Error fetching questions: {e}", None
96
  except requests.exceptions.JSONDecodeError as e:
97
- print(f"Error decoding JSON response from questions endpoint: {e}")
98
- print(f"Response text: {response.text[:500]}")
99
- return f"Error decoding server response for questions: {e}", None
100
  except Exception as e:
101
  print(f"An unexpected error occurred fetching questions: {e}")
102
  return f"An unexpected error occurred fetching questions: {e}", None
103
 
104
- # 3. Run your Agent
105
- results_log = []
106
- answers_payload = []
107
- print(f"Running agent on {len(questions_data)} questions...")
108
- for item in questions_data:
109
- task_id = item.get("task_id")
110
- question_text = item.get("question")
111
- if not task_id or question_text is None:
112
- print(f"Skipping item with missing task_id or question: {item}")
113
- continue
114
- try:
115
- submitted_answer = agent(question_text)
116
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
117
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
118
- except Exception as e:
119
- print(f"Error running agent on task {task_id}: {e}")
120
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
121
 
122
  if not answers_payload:
123
  print("Agent did not produce any answers to submit.")
124
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
125
 
126
  # 4. Prepare Submission
127
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
128
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
129
  print(status_update)
 
130
 
131
  # 5. Submit
132
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
133
  try:
134
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
 
 
135
  response.raise_for_status()
136
  result_data = response.json()
 
137
  final_status = (
138
  f"Submission Successful!\n"
139
  f"User: {result_data.get('username')}\n"
@@ -141,9 +389,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
141
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
142
  f"Message: {result_data.get('message', 'No message received.')}"
143
  )
 
144
  print("Submission successful.")
 
145
  results_df = pd.DataFrame(results_log)
146
  return final_status, results_df
 
147
  except requests.exceptions.HTTPError as e:
148
  error_detail = f"Server responded with status {e.response.status_code}."
149
  try:
@@ -172,42 +423,67 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
172
  return status_message, results_df
173
 
174
 
 
 
 
 
 
 
175
  # --- Build Gradio Interface using Blocks ---
176
  with gr.Blocks() as demo:
177
- gr.Markdown("# Basic Agent Evaluation Runner")
178
  gr.Markdown(
179
  """
180
  **Instructions:**
181
 
182
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
183
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
184
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
185
 
186
- ---
187
- **Disclaimers:**
188
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
189
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
190
  """
191
  )
192
 
193
- gr.LoginButton()
 
 
194
 
195
- run_button = gr.Button("Run Evaluation & Submit All Answers")
196
 
197
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
198
- # Removed max_rows=10 from DataFrame constructor
199
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  run_button.click(
202
  fn=run_and_submit_all,
 
203
  outputs=[status_output, results_table]
204
  )
205
 
 
206
  if __name__ == "__main__":
207
  print("\n" + "-"*30 + " App Starting " + "-"*30)
208
  # Check for SPACE_HOST and SPACE_ID at startup for information
209
  space_host_startup = os.getenv("SPACE_HOST")
210
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
211
 
212
  if space_host_startup:
213
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -215,7 +491,7 @@ if __name__ == "__main__":
215
  else:
216
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
217
 
218
- if space_id_startup: # Print repo URLs if SPACE_ID is found
219
  print(f"✅ SPACE_ID found: {space_id_startup}")
220
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
221
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -224,5 +500,5 @@ if __name__ == "__main__":
224
 
225
  print("-"*(60 + len(" App Starting ")) + "\n")
226
 
227
- print("Launching Gradio Interface for Basic Agent Evaluation...")
228
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import json
7
  import asyncio
8
+ from pathlib import Path
9
+ from datetime import datetime
10
+ from typing import List, Dict, Any, Optional
11
+ from tqdm.asyncio import tqdm as async_tqdm
12
+ from agents.llama_index_agent import GaiaAgent
13
+
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
+ CACHE_DIR = "cache"
17
+ CACHE_FILE = os.path.join(CACHE_DIR, "agent_cache.json")
18
+ MAX_CONCURRENT_REQUESTS = 3 # Limit concurrent API calls
19
 
20
+ # Model configurations
 
 
21
  CLAUDE = {
22
  "model_provider": "anthropic",
23
  "model_name": "claude-3-7-sonnet-latest"
 
26
  "model_provider": "openai",
27
  "model_name": "gpt-4o"
28
  }
29
+
30
+ # --- Optimized Agent Implementation ---
31
+ class OptimizedGaiaAgent:
32
+ """
33
+ Enhanced GAIA agent with caching and asynchronous processing capabilities.
34
+ """
35
+ def __init__(
36
+ self,
37
+ model_config=CLAUDE,
38
+ use_cache=True,
39
+ cache_file=CACHE_FILE,
40
+ max_concurrent=MAX_CONCURRENT_REQUESTS
41
+ ):
42
+ """
43
+ Initialize the optimized agent.
44
+
45
+ Args:
46
+ model_config: Dictionary with model_provider and model_name
47
+ use_cache: Whether to use caching
48
+ cache_file: Path to the cache file
49
+ max_concurrent: Maximum number of concurrent requests
50
+ """
51
+ self.agent = GaiaAgent(**model_config)
52
+ self.model_config = model_config
53
+ self.use_cache = use_cache
54
+ self.cache_file = cache_file
55
+ self.cache = self._load_cache() if use_cache else {}
56
+ self.semaphore = asyncio.Semaphore(max_concurrent)
57
+
58
+ print(f"OptimizedGaiaAgent initialized with {model_config['model_provider']} {model_config['model_name']}")
59
+ if use_cache:
60
+ print(f"Cache loaded with {len(self.cache)} answers")
61
+
62
+ def _load_cache(self) -> Dict[str, str]:
63
+ """Load cached answers from file"""
64
+ # Create cache directory if it doesn't exist
65
+ os.makedirs(os.path.dirname(self.cache_file), exist_ok=True)
66
+
67
+ cache_path = Path(self.cache_file)
68
+ if cache_path.exists():
69
+ try:
70
+ with open(cache_path, 'r') as f:
71
+ return json.load(f)
72
+ except Exception as e:
73
+ print(f"Error loading cache: {e}")
74
+ return {}
75
+ return {}
76
+
77
+ def _save_cache(self) -> None:
78
+ """Save cached answers to file"""
79
+ try:
80
+ with open(self.cache_file, 'w') as f:
81
+ json.dump(self.cache, f, indent=2)
82
+ except Exception as e:
83
+ print(f"Error saving cache: {e}")
84
+
85
+ def _get_cache_key(self, question: str) -> str:
86
+ """Generate a consistent key for the cache"""
87
+ # Strip whitespace and normalize
88
+ return question.strip()
89
+
90
+ async def process_question(self, task_id: str, question: str) -> Dict[str, Any]:
91
+ """
92
+ Process a single question, using cache if available.
93
+
94
+ Args:
95
+ task_id: ID of the task/question
96
+ question: The question text
97
+
98
+ Returns:
99
+ Dictionary with task_id, question, answer, and metadata
100
+ """
101
+ cache_key = self._get_cache_key(question)
102
+
103
+ # Check cache first
104
+ if self.use_cache and cache_key in self.cache:
105
+ print(f"🔄 Cache hit for task {task_id[:8]}...")
106
+ return {
107
+ "task_id": task_id,
108
+ "question": question,
109
+ "submitted_answer": self.cache[cache_key],
110
+ "cached": True,
111
+ "error": False
112
+ }
113
+
114
+ # Process the question (with semaphore to limit concurrent requests)
115
+ async with self.semaphore:
116
+ print(f"⚙️ Processing task {task_id[:8]}...")
117
+ try:
118
+ response = await self.agent.run(question)
119
+ answer = response.response.blocks[-1].text
120
+
121
+ # Save to cache
122
+ if self.use_cache:
123
+ self.cache[cache_key] = answer
124
+ # Use asyncio.to_thread for file I/O to avoid blocking
125
+ await asyncio.to_thread(self._save_cache)
126
+
127
+ return {
128
+ "task_id": task_id,
129
+ "question": question,
130
+ "submitted_answer": answer,
131
+ "cached": False,
132
+ "error": False
133
+ }
134
+ except Exception as e:
135
+ error_message = f"ERROR: {str(e)}"
136
+ print(f"❌ Error processing task {task_id[:8]}: {error_message}")
137
+ return {
138
+ "task_id": task_id,
139
+ "question": question,
140
+ "submitted_answer": error_message,
141
+ "cached": False,
142
+ "error": True
143
+ }
144
+
145
+ async def process_all(
146
+ self,
147
+ questions_data: List[Dict[str, Any]],
148
+ progress_callback=None
149
+ ) -> List[Dict[str, Any]]:
150
+ """
151
+ Process all questions, with progress reporting.
152
+
153
+ Args:
154
+ questions_data: List of question dictionaries
155
+ progress_callback: Function to call with progress updates
156
+
157
+ Returns:
158
+ List of results with answers and metadata
159
+ """
160
+ # Filter out invalid questions
161
+ valid_questions = [
162
+ item for item in questions_data
163
+ if item.get("task_id") and item.get("question") is not None
164
+ ]
165
+
166
+ if not valid_questions:
167
+ print("No valid questions to process.")
168
+ return []
169
+
170
+ total = len(valid_questions)
171
+ print(f"Processing {total} questions with {MAX_CONCURRENT_REQUESTS} concurrent tasks...")
172
+
173
+ # Process questions and collect results
174
+ results = []
175
+
176
+ # Create async tasks
177
+ tasks = [
178
+ self.process_question(item["task_id"], item["question"])
179
+ for item in valid_questions
180
+ ]
181
+
182
+ # Process with progress tracking
183
+ if progress_callback:
184
+ progress_callback(0, desc="Starting processing...")
185
+
186
+ # Process tasks one by one with progress updates
187
+ for i, task in enumerate(asyncio.as_completed(tasks)):
188
+ result = await task
189
+ results.append(result)
190
+
191
+ # Update progress
192
+ if progress_callback:
193
+ progress_callback((i + 1) / total, desc=f"Processed {i + 1}/{total} questions")
194
+
195
+ # Sort results to match original order
196
+ id_to_result = {result["task_id"]: result for result in results}
197
+ ordered_results = [
198
+ id_to_result.get(
199
+ item["task_id"],
200
+ {"task_id": item["task_id"], "question": item.get("question"), "submitted_answer": "ERROR: Processing failed", "error": True}
201
+ )
202
+ for item in valid_questions
203
+ ]
204
+
205
+ return ordered_results
206
+
207
+
208
+ # --- Main Application Class ---
209
  class BasicAgent:
210
+ """
211
+ Optimized agent wrapper for the GAIA benchmark.
212
+ """
213
  def __init__(
214
  self,
215
+ model_provider="anthropic",
216
+ model_name="claude-3-7-sonnet-latest",
217
+ api_key=None,
218
+ use_cache=True,
219
+ max_concurrent=MAX_CONCURRENT_REQUESTS
220
  ):
221
  """
222
+ Initialize the BasicAgent with caching and async capabilities.
223
 
224
  Args:
225
+ model_provider: LLM provider to use
226
  model_name: Specific model to use
227
+ api_key: Optional API key
228
+ use_cache: Whether to use caching
229
+ max_concurrent: Maximum concurrent requests
230
  """
231
+ model_config = {
232
+ "model_provider": model_provider,
233
+ "model_name": model_name,
234
+ "api_key": api_key
235
+ }
236
+
237
+ self.agent = OptimizedGaiaAgent(
238
+ model_config=model_config,
239
+ use_cache=use_cache,
240
+ max_concurrent=max_concurrent
241
+ )
242
  print(f"BasicAgent initialized with {model_provider} {model_name}.")
243
 
244
+ async def process_async(self, questions_data, progress_callback=None):
245
+ """Process questions asynchronously with progress reporting"""
246
+ return await self.agent.process_all(questions_data, progress_callback)
247
+
248
  def __call__(self, question: str) -> str:
249
+ """
250
+ Process a single question (for compatibility with the original interface).
251
+ This method is synchronous for backward compatibility.
252
+ """
253
  print(f"Agent received question (first 50 chars): {question[:50]}...")
254
 
255
  async def agentic_main():
256
+ result = await self.agent.process_question("single", question)
257
+ return result["submitted_answer"]
258
 
259
+ final_answer = asyncio.run(agentic_main())
 
260
  print(f"Agent returning answer: {final_answer}")
261
  return final_answer
262
 
263
+
264
+ # --- Async Run and Submit Function ---
265
+ async def async_run_and_submit_all(
266
+ profile: gr.OAuthProfile | None,
267
+ progress=gr.Progress()
268
+ ) -> tuple:
269
  """
270
+ Asynchronous version of run_and_submit_all.
271
+ Fetches questions, processes them concurrently, and submits answers.
272
  """
273
  # --- Determine HF Space Runtime URL and Repo URL ---
274
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
275
 
276
+ if not profile:
 
 
 
277
  print("User not logged in.")
278
  return "Please Login to Hugging Face with the button.", None
279
 
280
+ username = f"{profile.username}"
281
+ print(f"User logged in: {username}")
282
+
283
  api_url = DEFAULT_API_URL
284
  questions_url = f"{api_url}/questions"
285
  submit_url = f"{api_url}/submit"
286
 
287
+ # 1. Instantiate Agent
288
  try:
289
+ progress(0, desc="Initializing agent...")
290
  agent = BasicAgent()
291
  except Exception as e:
292
  print(f"Error instantiating agent: {e}")
293
  return f"Error initializing agent: {e}", None
294
+
295
+ # In the case of an app running as a Hugging Face space, this link points toward your codebase
296
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
297
  print(agent_code)
298
 
299
  # 2. Fetch Questions
300
  print(f"Fetching questions from: {questions_url}")
301
+ progress(0.1, desc="Fetching questions...")
302
  try:
303
+ # Use asyncio for the HTTP request
304
+ async def fetch_questions():
305
+ loop = asyncio.get_event_loop()
306
+ return await loop.run_in_executor(
307
+ None,
308
+ lambda: requests.get(questions_url, timeout=15)
309
+ )
310
+
311
+ response = await fetch_questions()
312
  response.raise_for_status()
313
  questions_data = response.json()
314
+
315
  if not questions_data:
316
+ print("Fetched questions list is empty.")
317
+ return "Fetched questions list is empty or invalid format.", None
318
+
319
  print(f"Fetched {len(questions_data)} questions.")
320
+ progress(0.2, desc=f"Successfully fetched {len(questions_data)} questions.")
321
+
322
  except requests.exceptions.RequestException as e:
323
  print(f"Error fetching questions: {e}")
324
  return f"Error fetching questions: {e}", None
325
  except requests.exceptions.JSONDecodeError as e:
326
+ print(f"Error decoding JSON response from questions endpoint: {e}")
327
+ print(f"Response text: {response.text[:500]}")
328
+ return f"Error decoding server response for questions: {e}", None
329
  except Exception as e:
330
  print(f"An unexpected error occurred fetching questions: {e}")
331
  return f"An unexpected error occurred fetching questions: {e}", None
332
 
333
+ # 3. Process Questions Asynchronously
334
+ print(f"Processing {len(questions_data)} questions...")
335
+ try:
336
+ # Define progress update function
337
+ def update_progress(value, desc=""):
338
+ # Scale progress from 0.2-0.8 for the processing phase
339
+ progress(0.2 + (value * 0.6), desc=desc)
340
+
341
+ results = await agent.process_async(questions_data, update_progress)
342
+
343
+ # Convert results to the expected format
344
+ answers_payload = [
345
+ {"task_id": result["task_id"], "submitted_answer": result["submitted_answer"]}
346
+ for result in results
347
+ ]
348
+
349
+ # Format for display
350
+ results_log = [
351
+ {"Task ID": result["task_id"], "Question": result["question"], "Submitted Answer": result["submitted_answer"]}
352
+ for result in results
353
+ ]
354
+
355
+ progress(0.8, desc=f"Processed all {len(results)} questions. Preparing submission...")
356
+
357
+ except Exception as e:
358
+ print(f"Error during question processing: {e}")
359
+ return f"Error during question processing: {e}", None
360
 
361
  if not answers_payload:
362
  print("Agent did not produce any answers to submit.")
363
+ return "Agent did not produce any answers to submit.", pd.DataFrame([])
364
 
365
  # 4. Prepare Submission
366
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
367
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
368
  print(status_update)
369
+ progress(0.9, desc="Submitting answers...")
370
 
371
  # 5. Submit
372
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
373
  try:
374
+ async def submit_answers():
375
+ loop = asyncio.get_event_loop()
376
+ return await loop.run_in_executor(
377
+ None,
378
+ lambda: requests.post(submit_url, json=submission_data, timeout=60)
379
+ )
380
+
381
+ response = await submit_answers()
382
  response.raise_for_status()
383
  result_data = response.json()
384
+
385
  final_status = (
386
  f"Submission Successful!\n"
387
  f"User: {result_data.get('username')}\n"
 
389
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
390
  f"Message: {result_data.get('message', 'No message received.')}"
391
  )
392
+
393
  print("Submission successful.")
394
+ progress(1.0, desc="Complete!")
395
  results_df = pd.DataFrame(results_log)
396
  return final_status, results_df
397
+
398
  except requests.exceptions.HTTPError as e:
399
  error_detail = f"Server responded with status {e.response.status_code}."
400
  try:
 
423
  return status_message, results_df
424
 
425
 
426
+ # Synchronous wrapper for the async function (for Gradio compatibility)
427
+ def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
428
+ """Synchronous wrapper for the async function"""
429
+ return asyncio.run(async_run_and_submit_all(profile, progress))
430
+
431
+
432
  # --- Build Gradio Interface using Blocks ---
433
  with gr.Blocks() as demo:
434
+ gr.Markdown("# Optimized GAIA Agent Evaluation Runner")
435
  gr.Markdown(
436
  """
437
  **Instructions:**
438
 
439
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, and necessary packages.
440
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
441
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, process them, and see your score.
442
 
443
+ This implementation features:
444
+ - **Caching**: Answers are saved to avoid reprocessing the same questions
445
+ - **Asynchronous Processing**: Questions are processed concurrently for better performance
446
+ - **Progress Tracking**: See real-time progress as questions are processed
447
  """
448
  )
449
 
450
+ with gr.Row():
451
+ gr.LoginButton()
452
+ clear_cache_button = gr.Button("Clear Cache")
453
 
454
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
455
 
456
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
457
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
458
 
459
+ # Define clear cache function
460
+ def clear_cache():
461
+ if os.path.exists(CACHE_FILE):
462
+ try:
463
+ os.remove(CACHE_FILE)
464
+ return f"Cache cleared successfully! ({CACHE_FILE})"
465
+ except Exception as e:
466
+ return f"Error clearing cache: {e}"
467
+ return "No cache file found."
468
+
469
+ # Connect the components
470
+ clear_cache_button.click(
471
+ fn=clear_cache,
472
+ outputs=status_output
473
+ )
474
+
475
  run_button.click(
476
  fn=run_and_submit_all,
477
+ inputs=[gr.OAuthProfile()],
478
  outputs=[status_output, results_table]
479
  )
480
 
481
+ # --- App Entry Point ---
482
  if __name__ == "__main__":
483
  print("\n" + "-"*30 + " App Starting " + "-"*30)
484
  # Check for SPACE_HOST and SPACE_ID at startup for information
485
  space_host_startup = os.getenv("SPACE_HOST")
486
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
487
 
488
  if space_host_startup:
489
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
491
  else:
492
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
493
 
494
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
495
  print(f"✅ SPACE_ID found: {space_id_startup}")
496
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
497
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
500
 
501
  print("-"*(60 + len(" App Starting ")) + "\n")
502
 
503
+ print("Launching Gradio Interface for Optimized Agent Evaluation...")
504
  demo.launch(debug=True, share=False)