drAbreu commited on
Commit
ff61745
·
1 Parent(s): cb2e2ec

Back to basic interface and checking with openai

Browse files
Files changed (1) hide show
  1. app.py +66 -342
app.py CHANGED
@@ -3,21 +3,15 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- import json
7
- import asyncio
8
- from pathlib import Path
9
- from datetime import datetime
10
- from typing import List, Dict, Any, Optional
11
- from tqdm.asyncio import tqdm as async_tqdm
12
  from agents.llama_index_agent import GaiaAgent
13
-
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
- CACHE_DIR = "cache"
17
- CACHE_FILE = os.path.join(CACHE_DIR, "agent_cache.json")
18
- MAX_CONCURRENT_REQUESTS = 3 # Limit concurrent API calls
19
 
20
- # Model configurations
 
 
21
  CLAUDE = {
22
  "model_provider": "anthropic",
23
  "model_name": "claude-3-7-sonnet-latest"
@@ -26,362 +20,120 @@ OPENAI = {
26
  "model_provider": "openai",
27
  "model_name": "gpt-4o"
28
  }
29
-
30
- # --- Optimized Agent Implementation ---
31
- class OptimizedGaiaAgent:
32
- """
33
- Enhanced GAIA agent with caching and asynchronous processing capabilities.
34
- """
35
- def __init__(
36
- self,
37
- model_config=CLAUDE,
38
- use_cache=True,
39
- cache_file=CACHE_FILE,
40
- max_concurrent=MAX_CONCURRENT_REQUESTS
41
- ):
42
- """
43
- Initialize the optimized agent.
44
-
45
- Args:
46
- model_config: Dictionary with model_provider and model_name
47
- use_cache: Whether to use caching
48
- cache_file: Path to the cache file
49
- max_concurrent: Maximum number of concurrent requests
50
- """
51
- self.agent = GaiaAgent(**model_config)
52
- self.model_config = model_config
53
- self.use_cache = use_cache
54
- self.cache_file = cache_file
55
- self.cache = self._load_cache() if use_cache else {}
56
- self.semaphore = asyncio.Semaphore(max_concurrent)
57
-
58
- print(f"OptimizedGaiaAgent initialized with {model_config['model_provider']} {model_config['model_name']}")
59
- if use_cache:
60
- print(f"Cache loaded with {len(self.cache)} answers")
61
-
62
- def _load_cache(self) -> Dict[str, str]:
63
- """Load cached answers from file"""
64
- # Create cache directory if it doesn't exist
65
- os.makedirs(os.path.dirname(self.cache_file), exist_ok=True)
66
-
67
- cache_path = Path(self.cache_file)
68
- if cache_path.exists():
69
- try:
70
- with open(cache_path, 'r') as f:
71
- return json.load(f)
72
- except Exception as e:
73
- print(f"Error loading cache: {e}")
74
- return {}
75
- return {}
76
-
77
- def _save_cache(self) -> None:
78
- """Save cached answers to file"""
79
- try:
80
- with open(self.cache_file, 'w') as f:
81
- json.dump(self.cache, f, indent=2)
82
- except Exception as e:
83
- print(f"Error saving cache: {e}")
84
-
85
- def _get_cache_key(self, question: str) -> str:
86
- """Generate a consistent key for the cache"""
87
- # Strip whitespace and normalize
88
- return question.strip()
89
-
90
- async def process_question(self, task_id: str, question: str) -> Dict[str, Any]:
91
- """
92
- Process a single question, using cache if available.
93
-
94
- Args:
95
- task_id: ID of the task/question
96
- question: The question text
97
-
98
- Returns:
99
- Dictionary with task_id, question, answer, and metadata
100
- """
101
- cache_key = self._get_cache_key(question)
102
-
103
- # Check cache first
104
- if self.use_cache and cache_key in self.cache:
105
- print(f"🔄 Cache hit for task {task_id[:8]}...")
106
- return {
107
- "task_id": task_id,
108
- "question": question,
109
- "submitted_answer": self.cache[cache_key],
110
- "cached": True,
111
- "error": False
112
- }
113
-
114
- # Process the question (with semaphore to limit concurrent requests)
115
- async with self.semaphore:
116
- print(f"⚙️ Processing task {task_id[:8]}...")
117
- try:
118
- response = await self.agent.run(question)
119
- answer = response.response.blocks[-1].text
120
-
121
- # Save to cache
122
- if self.use_cache:
123
- self.cache[cache_key] = answer
124
- # Use asyncio.to_thread for file I/O to avoid blocking
125
- await asyncio.to_thread(self._save_cache)
126
-
127
- return {
128
- "task_id": task_id,
129
- "question": question,
130
- "submitted_answer": answer,
131
- "cached": False,
132
- "error": False
133
- }
134
- except Exception as e:
135
- error_message = f"ERROR: {str(e)}"
136
- print(f"❌ Error processing task {task_id[:8]}: {error_message}")
137
- return {
138
- "task_id": task_id,
139
- "question": question,
140
- "submitted_answer": error_message,
141
- "cached": False,
142
- "error": True
143
- }
144
-
145
- async def process_all(
146
- self,
147
- questions_data: List[Dict[str, Any]],
148
- progress_callback=None
149
- ) -> List[Dict[str, Any]]:
150
- """
151
- Process all questions, with progress reporting.
152
-
153
- Args:
154
- questions_data: List of question dictionaries
155
- progress_callback: Function to call with progress updates
156
-
157
- Returns:
158
- List of results with answers and metadata
159
- """
160
- # Filter out invalid questions
161
- valid_questions = [
162
- item for item in questions_data
163
- if item.get("task_id") and item.get("question") is not None
164
- ]
165
-
166
- if not valid_questions:
167
- print("No valid questions to process.")
168
- return []
169
-
170
- total = len(valid_questions)
171
- print(f"Processing {total} questions with {MAX_CONCURRENT_REQUESTS} concurrent tasks...")
172
-
173
- # Process questions and collect results
174
- results = []
175
-
176
- # Create async tasks
177
- tasks = [
178
- self.process_question(item["task_id"], item["question"])
179
- for item in valid_questions
180
- ]
181
-
182
- # Process with progress tracking
183
- if progress_callback:
184
- progress_callback(0, desc="Starting processing...")
185
-
186
- # Process tasks one by one with progress updates
187
- for i, task in enumerate(asyncio.as_completed(tasks)):
188
- result = await task
189
- results.append(result)
190
-
191
- # Update progress
192
- if progress_callback:
193
- progress_callback((i + 1) / total, desc=f"Processed {i + 1}/{total} questions")
194
-
195
- # Sort results to match original order
196
- id_to_result = {result["task_id"]: result for result in results}
197
- ordered_results = [
198
- id_to_result.get(
199
- item["task_id"],
200
- {"task_id": item["task_id"], "question": item.get("question"), "submitted_answer": "ERROR: Processing failed", "error": True}
201
- )
202
- for item in valid_questions
203
- ]
204
-
205
- return ordered_results
206
-
207
-
208
- # --- Main Application Class ---
209
  class BasicAgent:
210
- """
211
- Optimized agent wrapper for the GAIA benchmark.
212
- """
213
  def __init__(
214
  self,
215
- model_provider="anthropic",
216
- model_name="claude-3-7-sonnet-latest",
217
- api_key=None,
218
- use_cache=True,
219
- max_concurrent=MAX_CONCURRENT_REQUESTS
220
  ):
221
  """
222
- Initialize the BasicAgent with caching and async capabilities.
223
 
224
  Args:
225
- model_provider: LLM provider to use
226
  model_name: Specific model to use
227
- api_key: Optional API key
228
- use_cache: Whether to use caching
229
- max_concurrent: Maximum concurrent requests
230
  """
231
- model_config = {
232
- "model_provider": model_provider,
233
- "model_name": model_name,
234
- "api_key": api_key
235
- }
236
-
237
- self.agent = OptimizedGaiaAgent(
238
- model_config=model_config,
239
- use_cache=use_cache,
240
- max_concurrent=max_concurrent
241
- )
242
  print(f"BasicAgent initialized with {model_provider} {model_name}.")
243
 
244
- async def process_async(self, questions_data, progress_callback=None):
245
- """Process questions asynchronously with progress reporting"""
246
- return await self.agent.process_all(questions_data, progress_callback)
247
-
248
  def __call__(self, question: str) -> str:
249
- """
250
- Process a single question (for compatibility with the original interface).
251
- This method is synchronous for backward compatibility.
252
- """
253
  print(f"Agent received question (first 50 chars): {question[:50]}...")
254
 
255
  async def agentic_main():
256
- result = await self.agent.process_question("single", question)
257
- return result["submitted_answer"]
258
 
259
- final_answer = asyncio.run(agentic_main())
 
260
  print(f"Agent returning answer: {final_answer}")
261
  return final_answer
262
 
263
-
264
- # --- Async Run and Submit Function ---
265
- async def async_run_and_submit_all(
266
- profile: gr.OAuthProfile | None,
267
- progress=gr.Progress()
268
- ) -> tuple:
269
  """
270
- Asynchronous version of run_and_submit_all.
271
- Fetches questions, processes them concurrently, and submits answers.
272
  """
273
  # --- Determine HF Space Runtime URL and Repo URL ---
274
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
275
 
276
- if not profile:
 
 
 
277
  print("User not logged in.")
278
  return "Please Login to Hugging Face with the button.", None
279
 
280
- username = f"{profile.username}"
281
- print(f"User logged in: {username}")
282
-
283
  api_url = DEFAULT_API_URL
284
  questions_url = f"{api_url}/questions"
285
  submit_url = f"{api_url}/submit"
286
 
287
- # 1. Instantiate Agent
288
  try:
289
- progress(0, desc="Initializing agent...")
290
  agent = BasicAgent()
291
  except Exception as e:
292
  print(f"Error instantiating agent: {e}")
293
  return f"Error initializing agent: {e}", None
294
-
295
- # In the case of an app running as a Hugging Face space, this link points toward your codebase
296
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
297
  print(agent_code)
298
 
299
  # 2. Fetch Questions
300
  print(f"Fetching questions from: {questions_url}")
301
- progress(0.1, desc="Fetching questions...")
302
  try:
303
- # Use asyncio for the HTTP request
304
- async def fetch_questions():
305
- loop = asyncio.get_event_loop()
306
- return await loop.run_in_executor(
307
- None,
308
- lambda: requests.get(questions_url, timeout=15)
309
- )
310
-
311
- response = await fetch_questions()
312
  response.raise_for_status()
313
  questions_data = response.json()
314
-
315
  if not questions_data:
316
- print("Fetched questions list is empty.")
317
- return "Fetched questions list is empty or invalid format.", None
318
-
319
  print(f"Fetched {len(questions_data)} questions.")
320
- progress(0.2, desc=f"Successfully fetched {len(questions_data)} questions.")
321
-
322
  except requests.exceptions.RequestException as e:
323
  print(f"Error fetching questions: {e}")
324
  return f"Error fetching questions: {e}", None
325
  except requests.exceptions.JSONDecodeError as e:
326
- print(f"Error decoding JSON response from questions endpoint: {e}")
327
- print(f"Response text: {response.text[:500]}")
328
- return f"Error decoding server response for questions: {e}", None
329
  except Exception as e:
330
  print(f"An unexpected error occurred fetching questions: {e}")
331
  return f"An unexpected error occurred fetching questions: {e}", None
332
 
333
- # 3. Process Questions Asynchronously
334
- print(f"Processing {len(questions_data)} questions...")
335
- try:
336
- # Define progress update function
337
- def update_progress(value, desc=""):
338
- # Scale progress from 0.2-0.8 for the processing phase
339
- progress(0.2 + (value * 0.6), desc=desc)
340
-
341
- results = await agent.process_async(questions_data, update_progress)
342
-
343
- # Convert results to the expected format
344
- answers_payload = [
345
- {"task_id": result["task_id"], "submitted_answer": result["submitted_answer"]}
346
- for result in results
347
- ]
348
-
349
- # Format for display
350
- results_log = [
351
- {"Task ID": result["task_id"], "Question": result["question"], "Submitted Answer": result["submitted_answer"]}
352
- for result in results
353
- ]
354
-
355
- progress(0.8, desc=f"Processed all {len(results)} questions. Preparing submission...")
356
-
357
- except Exception as e:
358
- print(f"Error during question processing: {e}")
359
- return f"Error during question processing: {e}", None
360
 
361
  if not answers_payload:
362
  print("Agent did not produce any answers to submit.")
363
- return "Agent did not produce any answers to submit.", pd.DataFrame([])
364
 
365
  # 4. Prepare Submission
366
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
367
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
368
  print(status_update)
369
- progress(0.9, desc="Submitting answers...")
370
 
371
  # 5. Submit
372
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
373
  try:
374
- async def submit_answers():
375
- loop = asyncio.get_event_loop()
376
- return await loop.run_in_executor(
377
- None,
378
- lambda: requests.post(submit_url, json=submission_data, timeout=60)
379
- )
380
-
381
- response = await submit_answers()
382
  response.raise_for_status()
383
  result_data = response.json()
384
-
385
  final_status = (
386
  f"Submission Successful!\n"
387
  f"User: {result_data.get('username')}\n"
@@ -389,12 +141,9 @@ async def async_run_and_submit_all(
389
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
390
  f"Message: {result_data.get('message', 'No message received.')}"
391
  )
392
-
393
  print("Submission successful.")
394
- progress(1.0, desc="Complete!")
395
  results_df = pd.DataFrame(results_log)
396
  return final_status, results_df
397
-
398
  except requests.exceptions.HTTPError as e:
399
  error_detail = f"Server responded with status {e.response.status_code}."
400
  try:
@@ -423,67 +172,42 @@ async def async_run_and_submit_all(
423
  return status_message, results_df
424
 
425
 
426
- # Synchronous wrapper for the async function (for Gradio compatibility)
427
- def run_and_submit_all(profile: gr.OAuthProfile | None, progress=gr.Progress()):
428
- """Synchronous wrapper for the async function"""
429
- return asyncio.run(async_run_and_submit_all(profile, progress))
430
-
431
-
432
  # --- Build Gradio Interface using Blocks ---
433
  with gr.Blocks() as demo:
434
- gr.Markdown("# Optimized GAIA Agent Evaluation Runner")
435
  gr.Markdown(
436
  """
437
  **Instructions:**
438
 
439
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, and necessary packages.
440
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
441
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, process them, and see your score.
442
 
443
- This implementation features:
444
- - **Caching**: Answers are saved to avoid reprocessing the same questions
445
- - **Asynchronous Processing**: Questions are processed concurrently for better performance
446
- - **Progress Tracking**: See real-time progress as questions are processed
447
  """
448
  )
449
 
450
- with gr.Row():
451
- gr.LoginButton()
452
- clear_cache_button = gr.Button("Clear Cache")
453
 
454
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
455
 
456
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
457
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
458
 
459
- # Define clear cache function
460
- def clear_cache():
461
- if os.path.exists(CACHE_FILE):
462
- try:
463
- os.remove(CACHE_FILE)
464
- return f"Cache cleared successfully! ({CACHE_FILE})"
465
- except Exception as e:
466
- return f"Error clearing cache: {e}"
467
- return "No cache file found."
468
-
469
- # Connect the components
470
- clear_cache_button.click(
471
- fn=clear_cache,
472
- outputs=status_output
473
- )
474
-
475
  run_button.click(
476
  fn=run_and_submit_all,
477
- inputs=[gr.OAuthProfile()],
478
  outputs=[status_output, results_table]
479
  )
480
 
481
- # --- App Entry Point ---
482
  if __name__ == "__main__":
483
  print("\n" + "-"*30 + " App Starting " + "-"*30)
484
  # Check for SPACE_HOST and SPACE_ID at startup for information
485
  space_host_startup = os.getenv("SPACE_HOST")
486
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
487
 
488
  if space_host_startup:
489
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -491,7 +215,7 @@ if __name__ == "__main__":
491
  else:
492
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
493
 
494
- if space_id_startup: # Print repo URLs if SPACE_ID is found
495
  print(f"✅ SPACE_ID found: {space_id_startup}")
496
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
497
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
@@ -500,5 +224,5 @@ if __name__ == "__main__":
500
 
501
  print("-"*(60 + len(" App Starting ")) + "\n")
502
 
503
- print("Launching Gradio Interface for Optimized Agent Evaluation...")
504
  demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
6
  from agents.llama_index_agent import GaiaAgent
7
+ import asyncio
8
+ # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
11
 
12
+ # --- Basic Agent Definition ---
13
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
+
15
  CLAUDE = {
16
  "model_provider": "anthropic",
17
  "model_name": "claude-3-7-sonnet-latest"
 
20
  "model_provider": "openai",
21
  "model_name": "gpt-4o"
22
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  class BasicAgent:
 
 
 
24
  def __init__(
25
  self,
26
+ model_provider="openai",
27
+ model_name="gpt-4o",
28
+ api_key=None
 
 
29
  ):
30
  """
31
+ Initialize the BasicAgent with configurable model settings.
32
 
33
  Args:
34
+ model_provider: LLM provider to use (openai, anthropic, etc.)
35
  model_name: Specific model to use
36
+ api_key: Optional API key (defaults to environment variable)
 
 
37
  """
38
+ self.agent = GaiaAgent(**OPENAI)
 
 
 
 
 
 
 
 
 
 
39
  print(f"BasicAgent initialized with {model_provider} {model_name}.")
40
 
 
 
 
 
41
  def __call__(self, question: str) -> str:
42
+ """Process a GAIA benchmark question and return the formatted answer."""
 
 
 
43
  print(f"Agent received question (first 50 chars): {question[:50]}...")
44
 
45
  async def agentic_main():
46
+ response = await self.agent.run(question)
47
+ return response
48
 
49
+ response = asyncio.run(agentic_main())
50
+ final_answer = response.response.blocks[-1].text
51
  print(f"Agent returning answer: {final_answer}")
52
  return final_answer
53
 
54
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
55
  """
56
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
57
+ and displays the results.
58
  """
59
  # --- Determine HF Space Runtime URL and Repo URL ---
60
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
61
 
62
+ if profile:
63
+ username= f"{profile.username}"
64
+ print(f"User logged in: {username}")
65
+ else:
66
  print("User not logged in.")
67
  return "Please Login to Hugging Face with the button.", None
68
 
 
 
 
69
  api_url = DEFAULT_API_URL
70
  questions_url = f"{api_url}/questions"
71
  submit_url = f"{api_url}/submit"
72
 
73
+ # 1. Instantiate Agent ( modify this part to create your agent)
74
  try:
 
75
  agent = BasicAgent()
76
  except Exception as e:
77
  print(f"Error instantiating agent: {e}")
78
  return f"Error initializing agent: {e}", None
79
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
80
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
81
  print(agent_code)
82
 
83
  # 2. Fetch Questions
84
  print(f"Fetching questions from: {questions_url}")
 
85
  try:
86
+ response = requests.get(questions_url, timeout=15)
 
 
 
 
 
 
 
 
87
  response.raise_for_status()
88
  questions_data = response.json()
 
89
  if not questions_data:
90
+ print("Fetched questions list is empty.")
91
+ return "Fetched questions list is empty or invalid format.", None
 
92
  print(f"Fetched {len(questions_data)} questions.")
 
 
93
  except requests.exceptions.RequestException as e:
94
  print(f"Error fetching questions: {e}")
95
  return f"Error fetching questions: {e}", None
96
  except requests.exceptions.JSONDecodeError as e:
97
+ print(f"Error decoding JSON response from questions endpoint: {e}")
98
+ print(f"Response text: {response.text[:500]}")
99
+ return f"Error decoding server response for questions: {e}", None
100
  except Exception as e:
101
  print(f"An unexpected error occurred fetching questions: {e}")
102
  return f"An unexpected error occurred fetching questions: {e}", None
103
 
104
+ # 3. Run your Agent
105
+ results_log = []
106
+ answers_payload = []
107
+ print(f"Running agent on {len(questions_data)} questions...")
108
+ for item in questions_data:
109
+ task_id = item.get("task_id")
110
+ question_text = item.get("question")
111
+ if not task_id or question_text is None:
112
+ print(f"Skipping item with missing task_id or question: {item}")
113
+ continue
114
+ try:
115
+ submitted_answer = agent(question_text)
116
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
117
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
118
+ except Exception as e:
119
+ print(f"Error running agent on task {task_id}: {e}")
120
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
121
 
122
  if not answers_payload:
123
  print("Agent did not produce any answers to submit.")
124
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
125
 
126
  # 4. Prepare Submission
127
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
128
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
129
  print(status_update)
 
130
 
131
  # 5. Submit
132
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
133
  try:
134
+ response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
 
 
 
135
  response.raise_for_status()
136
  result_data = response.json()
 
137
  final_status = (
138
  f"Submission Successful!\n"
139
  f"User: {result_data.get('username')}\n"
 
141
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
142
  f"Message: {result_data.get('message', 'No message received.')}"
143
  )
 
144
  print("Submission successful.")
 
145
  results_df = pd.DataFrame(results_log)
146
  return final_status, results_df
 
147
  except requests.exceptions.HTTPError as e:
148
  error_detail = f"Server responded with status {e.response.status_code}."
149
  try:
 
172
  return status_message, results_df
173
 
174
 
 
 
 
 
 
 
175
  # --- Build Gradio Interface using Blocks ---
176
  with gr.Blocks() as demo:
177
+ gr.Markdown("# Basic Agent Evaluation Runner")
178
  gr.Markdown(
179
  """
180
  **Instructions:**
181
 
182
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
183
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
184
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
185
 
186
+ ---
187
+ **Disclaimers:**
188
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
189
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
190
  """
191
  )
192
 
193
+ gr.LoginButton()
 
 
194
 
195
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
196
 
197
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
198
+ # Removed max_rows=10 from DataFrame constructor
199
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  run_button.click(
202
  fn=run_and_submit_all,
 
203
  outputs=[status_output, results_table]
204
  )
205
 
 
206
  if __name__ == "__main__":
207
  print("\n" + "-"*30 + " App Starting " + "-"*30)
208
  # Check for SPACE_HOST and SPACE_ID at startup for information
209
  space_host_startup = os.getenv("SPACE_HOST")
210
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
211
 
212
  if space_host_startup:
213
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
215
  else:
216
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
217
 
218
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
219
  print(f"✅ SPACE_ID found: {space_id_startup}")
220
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
221
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
224
 
225
  print("-"*(60 + len(" App Starting ")) + "\n")
226
 
227
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
228
  demo.launch(debug=True, share=False)