Jofthomas commited on
Commit
31243f4
·
verified ·
1 Parent(s): e80aab9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +131 -384
app.py CHANGED
@@ -2,452 +2,199 @@ import os
2
  import gradio as gr
3
  import requests
4
  import inspect # To get source code for __repr__
5
- import asyncio
6
- from typing import Dict, List, AsyncGenerator, Union, Tuple, Optional
7
-
8
- # --- LangChain Specific Imports ---
9
- from langchain_core.messages import HumanMessage, AIMessage, BaseMessage
10
- from langchain_core.tools import tool
11
- from langchain_openai import ChatOpenAI
12
- from langgraph.checkpoint.memory import MemorySaver
13
- from langgraph.prebuilt import create_react_agent
14
 
15
  # --- Constants ---
16
  DEFAULT_API_URL = "http://127.0.0.1:8000" # Default URL for your FastAPI app
17
 
18
- # --- Tools (Keep these defined globally or ensure they are included in __repr__) ---
19
- @tool
20
- def get_lat_lng(location_description: str) -> dict[str, float]:
21
- """Get the latitude and longitude of a location."""
22
- print(f"Tool: Getting lat/lng for {location_description}")
23
- # Replace with actual API call in a real app
24
- if "tokyo" in location_description.lower():
25
- return {"lat": 35.6895, "lng": 139.6917}
26
- elif "paris" in location_description.lower():
27
- return {"lat": 48.8566, "lng": 2.3522}
28
- elif "new york" in location_description.lower():
29
- return {"lat": 40.7128, "lng": -74.0060}
30
- else:
31
- return {"lat": 51.5072, "lng": -0.1276} # Default London
32
-
33
- @tool
34
- def get_weather(lat: float, lng: float) -> dict[str, str]:
35
- """Get the weather at a location."""
36
- print(f"Tool: Getting weather for lat={lat}, lng={lng}")
37
- # Replace with actual API call in a real app
38
- if lat > 45: # Northern locations
39
- return {"temperature": "15°C", "description": "Cloudy"}
40
- elif lat > 30: # Mid locations
41
- return {"temperature": "25°C", "description": "Sunny"}
42
- else: # Southern locations
43
- return {"temperature": "30°C", "description": "Very Sunny"}
44
 
45
- # --- Agent Class Definition ---
46
- class MyLangChainAgent:
47
  """
48
- A sample LangChain agent class designed for interaction and submission.
49
- NOTE: The current tools (weather/location) are placeholders and WILL NOT
50
- correctly answer GAIA benchmark questions. This class structure
51
- demonstrates how to integrate an agent with the submission API.
52
- Replace LLM, tools, and potentially the agent type for actual GAIA tasks.
53
  """
54
- def __init__(self, model_name="gpt-4", temperature=0):
55
- # Ensure API key is available
56
- if not os.getenv("OPENAI_API_KEY"):
57
- raise ValueError("OPENAI_API_KEY environment variable not set.")
58
-
59
- self.llm = ChatOpenAI(temperature=temperature, model=model_name)
60
- self.tools = [get_lat_lng, get_weather] # Use the globally defined tools
61
- self.memory = MemorySaver()
62
- # Create the agent executor
63
- self.agent_executor = create_react_agent(self.llm, self.tools, checkpointer=self.memory)
64
- print("MyLangChainAgent initialized.")
65
 
66
- async def __call__(self, question: str, thread_id: str) -> AsyncGenerator[Union[str, Dict[str, str]], str]:
67
  """
68
- Runs the agent asynchronously, yielding intermediate steps and returning the final answer.
69
-
70
- Args:
71
- question: The input question string.
72
- thread_id: A unique identifier for the conversation thread.
73
-
74
- Yields:
75
- Intermediate steps (tool calls/results) as strings or dicts.
76
-
77
- Returns:
78
- The final AI answer as a string.
79
  """
80
- print(f"Agent executing for thread_id: {thread_id} on question: {question[:50]}...")
81
- lc_messages: List[BaseMessage] = [HumanMessage(content=question)]
82
- final_answer = ""
83
- full_response_content = "" # Store the complete AI response chunks
84
-
85
- async for chunk in self.agent_executor.astream_events(
86
- {"messages": lc_messages},
87
- config={"configurable": {"thread_id": thread_id}},
88
- version="v1"
89
- ):
90
- event = chunk["event"]
91
- data = chunk["data"]
92
- # print(f"DEBUG: Event: {event}, Data Keys: {data.keys()}") # Debugging line
93
-
94
- if event == "on_chat_model_stream":
95
- content = data["chunk"].content
96
- if content:
97
- # print(f"DEBUG: AI Chunk: {content}") # Debugging line
98
- full_response_content += content
99
- # Yield potentially incomplete response for live typing effect if needed
100
- # yield {"type": "stream", "content": content }
101
-
102
- elif event == "on_tool_start":
103
- tool_input_str = str(data.get('input', ''))
104
- yield f"🛠️ Using tool: **{data['name']}** with input: `{tool_input_str}`"
105
-
106
- elif event == "on_tool_end":
107
- tool_output_str = str(data.get('output', ''))
108
- yield f"✅ Tool **{data['name']}** finished.\nResult: `{tool_output_str}`"
109
-
110
- # Detect the end of the conversation turn (heuristic)
111
- # The 'on_chain_end' event for the top-level graph might signal the end.
112
- # Or check the 'messages' list in the final state if available.
113
- # For create_react_agent, the final AIMessage is often the last main event.
114
- # We will capture the last full AI message content after the loop.
115
-
116
- # After iterating through all chunks, the final answer should be in full_response_content
117
- final_answer = full_response_content.strip()
118
- print(f"Agent execution finished. Final Answer: {final_answer[:100]}...")
119
- # Yield the complete final answer distinctly if needed
120
- # yield {"type": "final_answer_marker", "content": final_answer} # Example marker
121
- return final_answer # Return the final answer
122
 
123
  def __repr__(self) -> str:
124
  """
125
- Return the source code required to reconstruct this agent, including
126
- the class definition, tool functions, and necessary imports.
127
  """
128
  imports = [
129
- "import os",
130
- "from typing import Dict, List, AsyncGenerator, Union, Tuple, Optional",
131
- "from langchain_core.messages import HumanMessage, AIMessage, BaseMessage",
132
- "from langchain_core.tools import tool",
133
- "from langchain_openai import ChatOpenAI",
134
- "from langgraph.checkpoint.memory import MemorySaver",
135
- "from langgraph.prebuilt import create_react_agent",
136
- "import inspect", # Needed if repr itself uses inspect dynamically
137
- "import asyncio", # Needed for async call
138
- "\n"
139
  ]
140
- # Get source code of tool functions
141
- tool_sources = []
142
- for t in self.tools:
143
- try:
144
- tool_sources.append(inspect.getsource(t))
145
- except (TypeError, OSError) as e:
146
- print(f"Warning: Could not get source for tool {t.__name__}: {e}")
147
- tool_sources.append(f"# Could not automatically get source for tool: {t.__name__}\n")
148
-
149
- # Get source code of the class itself
150
- class_source = inspect.getsource(MyLangChainAgent)
151
-
152
- # Combine imports, tools, and class definition
153
- full_source = "\n".join(imports) + "\n\n" + \
154
- "\n\n".join(tool_sources) + "\n\n" + \
155
- class_source
156
  return full_source
157
 
158
-
159
  # --- Gradio UI and Logic ---
160
 
161
- # Initialize the agent (do this once outside the request functions)
162
- # Handle potential API key error during initialization
163
- try:
164
- agent_instance = MyLangChainAgent()
165
- except ValueError as e:
166
- print(f"ERROR initializing agent: {e}")
167
- # Provide a dummy agent or exit if critical
168
- agent_instance = None # Or raise SystemExit("Agent initialization failed")
 
169
 
170
- def format_chat_history(history: List[List[Optional[str]]]) -> List[Tuple[Optional[str], Optional[str]]]:
171
- """Helper to format Gradio history for display."""
172
- # Gradio's history format is List[List[user_msg | None, ai_msg | None]]
173
- # We want List[Tuple[user_msg | None, ai_msg | None]] for Chatbot
174
- formatted = []
175
- for turn in history:
176
- formatted.append(tuple(turn))
177
- return formatted
178
 
 
 
 
 
 
 
 
 
179
 
180
- async def fetch_and_display_question(api_url: str):
181
- """Calls the backend to get a random question."""
182
- if not api_url:
183
- return "Please enter the API URL.", "", "", gr.update(value=""), gr.update(value="") # Clear chat too
184
-
185
- question_url = f"{api_url.strip('/')}/random-question"
186
- print(f"Fetching question from: {question_url}")
187
  try:
188
- response = requests.get(question_url, timeout=10)
189
- response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
190
- data = response.json()
191
- task_id = data.get("task_id")
192
- question_text = data.get("question")
193
- if task_id and question_text:
194
- print(f"Fetched Task ID: {task_id}")
195
- # Return updates for Gradio components: Status, Task ID, Question Text, Clear Agent Answer, Clear Chat
196
- return "Question fetched successfully!", task_id, question_text, "", [] # Clears answer and chat history
197
- else:
198
- return "Error: Invalid data format received from API.", "", "", "", []
199
  except requests.exceptions.RequestException as e:
200
- print(f"Error fetching question: {e}")
201
- return f"Error fetching question: {e}", "", "", "", []
202
  except Exception as e:
203
- print(f"An unexpected error occurred: {e}")
204
- return f"An unexpected error occurred: {e}", "", "", "", []
205
-
206
-
207
- async def run_agent_interaction(
208
- message: str,
209
- history: List[List[Optional[str]]],
210
- current_task_id: str,
211
- # agent_instance: MyLangChainAgent # Agent passed via state potentially
212
- ):
213
- """Handles the chat interaction, runs the agent, yields steps, updates final answer state."""
214
- if agent_instance is None:
215
- yield "Agent not initialized. Please check API keys and restart."
216
- return
217
-
218
- if not current_task_id:
219
- yield "Please fetch a question first using the button above."
220
- return
221
-
222
- # The 'message' here is the user's latest input in the chat.
223
- # For this workflow, we assume the main input is the fetched question.
224
- # We'll use the fetched question (implicitly stored) to run the agent.
225
- # If you want interactive chat *about* the question, the logic needs adjustment.
226
-
227
- # For simplicity, let's assume the user's message *is* the question or a prompt related to it.
228
- # In the GAIA context, usually, the agent just runs on the provided question directly.
229
- # We'll use the `current_task_id` to generate a unique thread_id for LangGraph memory.
230
- thread_id = f"gaia_task_{current_task_id}_{os.urandom(4).hex()}"
231
-
232
- print(f"Running agent for user message: {message[:50]}...")
233
- history.append([message, None]) # Add user message to history
234
-
235
- final_agent_answer = None
236
- full_yielded_response = ""
237
-
238
- # Use the agent's __call__ method
239
- async for step in agent_instance(message, thread_id=thread_id):
240
- if isinstance(step, str):
241
- # Intermediate step (tool call, result, maybe stream chunk)
242
- history[-1][1] = step # Update the AI's response in the last turn
243
- yield format_chat_history(history) # Update chatbot UI
244
- full_yielded_response = step # Track last yielded message
245
- # If __call__ yielded dicts for streaming, handle here:
246
- # elif isinstance(step, dict) and step.get("type") == "stream":
247
- # history[-1][1] = (history[-1][1] or "") + step["content"]
248
- # yield format_chat_history(history)
249
-
250
- # After the loop, the `step` variable holds the return value (final answer)
251
- final_agent_answer = step
252
- print(f"Agent final answer received: {final_agent_answer[:100]}...")
253
-
254
- # Update the history with the definitive final answer
255
- if final_agent_answer:
256
- history[-1][1] = final_agent_answer # Replace intermediate steps with final one
257
- elif full_yielded_response:
258
- # Fallback if final answer wasn't returned correctly but we yielded something
259
- history[-1][1] = full_yielded_response
260
- final_agent_answer = full_yielded_response # Use the last yielded message as answer
261
- else:
262
- history[-1][1] = "Agent did not produce a final answer."
263
- final_agent_answer = "" # Ensure it's a string
264
-
265
- # Yield the final state of the history and update the hidden state for the final answer
266
- yield format_chat_history(history), final_agent_answer
267
 
 
 
 
 
 
 
268
 
269
- def submit_to_leaderboard(
270
- api_url: str,
271
- username: str,
272
- task_id: str,
273
- agent_answer: str,
274
- # agent_instance: MyLangChainAgent # Pass agent via state if needed
275
- ):
276
- """Submits the agent's answer and code to the FastAPI backend."""
277
- if agent_instance is None:
278
- return "Agent not initialized. Cannot submit."
279
- if not api_url:
280
- return "Please enter the API URL."
281
- if not username:
282
- return "Please enter your Hugging Face username."
283
- if not task_id:
284
- return "No task ID available. Please fetch a question first."
285
- if agent_answer is None or agent_answer.strip() == "": # Check if None or empty
286
- # Maybe allow submission of empty answer? Depends on requirements.
287
- print("Warning: Submitting empty answer.")
288
- # return "Agent has not provided an answer yet."
289
-
290
 
291
- submit_url = f"{api_url.strip('/')}/submit"
292
- print(f"Submitting to: {submit_url}")
293
-
294
- # Get agent code
295
- try:
296
- agent_code = agent_instance.__repr__()
297
- # print(f"Agent Code (first 200 chars):\n{agent_code[:200]}...") # Debug
298
- except Exception as e:
299
- print(f"Error getting agent representation: {e}")
300
- return f"Error generating agent code for submission: {e}"
301
-
302
- # Prepare submission data according to Pydantic model in FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  submission_data = {
304
  "username": username.strip(),
305
  "agent_code": agent_code,
306
- "answers": [
307
- {
308
- "task_id": task_id,
309
- "submitted_answer": agent_answer # Use the stored final answer
310
- }
311
- # Add more answers here if submitting a batch
312
- ]
313
  }
 
 
314
 
 
 
315
  try:
316
- response = requests.post(submit_url, json=submission_data, timeout=30)
317
  response.raise_for_status()
318
  result_data = response.json()
319
- # Format the result nicely for display
320
- result_message = (
 
321
  f"Submission Successful!\n"
322
  f"User: {result_data.get('username')}\n"
323
- f"Score: {result_data.get('score')}\n"
324
- f"Correct: {result_data.get('correct_count')}/{result_data.get('total_attempted')}\n"
325
- f"Message: {result_data.get('message')}\n"
326
- f"Timestamp: {result_data.get('timestamp')}"
327
  )
328
  print("Submission successful.")
329
- return result_message
 
 
330
  except requests.exceptions.HTTPError as e:
331
- # Try to get detail from response body if available
332
  error_detail = e.response.text
333
  try:
334
  error_json = e.response.json()
335
  error_detail = error_json.get('detail', error_detail)
336
  except requests.exceptions.JSONDecodeError:
337
- pass # Keep the raw text if not JSON
338
- print(f"HTTP Error during submission: {e.response.status_code} - {error_detail}")
339
- return f"Submission Failed (HTTP {e.response.status_code}): {error_detail}"
 
 
340
  except requests.exceptions.RequestException as e:
341
- print(f"Network error during submission: {e}")
342
- return f"Submission Failed: Network error - {e}"
 
 
343
  except Exception as e:
344
- print(f"An unexpected error occurred during submission: {e}")
345
- return f"Submission Failed: An unexpected error occurred - {e}"
 
 
346
 
347
 
348
  # --- Build Gradio Interface using Blocks ---
349
  with gr.Blocks() as demo:
350
- gr.Markdown("# Agent Evaluation Interface")
351
  gr.Markdown(
352
- "Fetch a random question from the evaluation API, interact with the agent "
353
- "(Note: the default agent answers weather questions, not GAIA), "
354
- "and submit the agent's final answer to the leaderboard."
355
  )
356
 
357
- # --- State Variables ---
358
- # Store current task info, agent's final answer, and the agent instance
359
- current_task_id = gr.State("")
360
- current_question_text = gr.State("")
361
- current_agent_answer = gr.State("") # Stores the final answer string from the agent
362
- # agent_state = gr.State(agent_instance) # Pass agent instance via state
363
-
364
  with gr.Row():
365
  api_url_input = gr.Textbox(label="FastAPI API URL", value=DEFAULT_API_URL)
366
  hf_username_input = gr.Textbox(label="Hugging Face Username")
367
 
368
- with gr.Row():
369
- fetch_button = gr.Button("Get Random Question")
370
- submission_status_display = gr.Textbox(label="Status", interactive=False) # For fetch status
371
-
372
- with gr.Row():
373
- question_display = gr.Textbox(label="Current Question", lines=3, interactive=False)
374
 
375
- gr.Markdown("---")
376
- gr.Markdown("## Agent Interaction")
377
 
378
- chatbot = gr.Chatbot(label="Agent Conversation", height=400)
379
- msg_input = gr.Textbox(label="Send a message to the Agent (or just observe)") # Input for chat
380
-
381
- # Hidden Textbox to display the final extracted answer (optional, for clarity)
382
- final_answer_display = gr.Textbox(label="Agent's Final Answer (Extracted)", interactive=False)
383
-
384
- gr.Markdown("---")
385
- gr.Markdown("## Submission")
386
- with gr.Row():
387
- submit_button = gr.Button("Submit Current Answer to Leaderboard")
388
-
389
- submission_result_display = gr.Markdown(label="Submission Result", value="*Submit an answer to see the result here.*") # Use Markdown for better formatting
390
-
391
-
392
- # --- Component Interactions ---
393
-
394
- # Fetch Button Action
395
- fetch_button.click(
396
- fn=fetch_and_display_question,
397
- inputs=[api_url_input],
398
- outputs=[
399
- submission_status_display, # Shows fetch status
400
- current_task_id, # Updates hidden state
401
- question_display, # Updates question text box
402
- final_answer_display, # Clears old final answer
403
- chatbot # Clears chat history
404
- ]
405
  )
406
 
407
- # Chat Submission Action (when user sends message in chat)
408
- msg_input.submit(
409
- fn=run_agent_interaction,
410
- inputs=[
411
- msg_input, # User message from chat input
412
- chatbot, # Current chat history
413
- current_task_id, # Current task ID from state
414
- # agent_state # Pass agent instance state
415
- ],
416
- outputs=[
417
- chatbot, # Updated chat history
418
- current_agent_answer # Update the hidden state holding the final answer
419
- ]
420
- ).then(
421
- # After agent runs, update the visible "Final Answer" box from the state
422
- lambda answer_state: answer_state,
423
- inputs=[current_agent_answer],
424
- outputs=[final_answer_display]
425
- )
426
-
427
- # Clear message input after submission
428
- msg_input.submit(lambda: "", None, msg_input, queue=False)
429
-
430
-
431
- # Submit Button Action
432
- submit_button.click(
433
- fn=submit_to_leaderboard,
434
- inputs=[
435
- api_url_input,
436
- hf_username_input,
437
- current_task_id,
438
- current_agent_answer, # Use the stored final answer state
439
- # agent_state # Pass agent instance state
440
- ],
441
- outputs=[submission_result_display] # Display result message
442
- )
443
-
444
-
445
  if __name__ == "__main__":
446
- if agent_instance is None:
447
- print("\nFATAL: Agent could not be initialized. Gradio app will not run correctly.")
448
- print("Please ensure OPENAI_API_KEY is set and valid.\n")
449
- # Optionally exit here if agent is critical
450
- # exit(1)
451
- else:
452
- print("Launching Gradio Interface...")
453
- demo.launch(debug=True, server_name="0.0.0.0") # Share=False by default for security
 
2
  import gradio as gr
3
  import requests
4
  import inspect # To get source code for __repr__
5
+ import pandas as pd # For displaying results in a table
 
 
 
 
 
 
 
 
6
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "http://127.0.0.1:8000" # Default URL for your FastAPI app
9
 
10
+ # --- Basic Agent Definition ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ class BasicAgent:
 
13
  """
14
+ A very simple agent placeholder.
15
+ It just returns a fixed string for any question.
 
 
 
16
  """
17
+ def __init__(self):
18
+ print("BasicAgent initialized.")
19
+ # Add any setup if needed
 
 
 
 
 
 
 
 
20
 
21
+ def __call__(self, question: str) -> str:
22
  """
23
+ The agent's logic to answer a question.
24
+ This basic version ignores the question content.
 
 
 
 
 
 
 
 
 
25
  """
26
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
27
+ # Replace this with actual logic if you were building a real agent
28
+ fixed_answer = "This is a default answer."
29
+ print(f"Agent returning fixed answer: {fixed_answer}")
30
+ return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def __repr__(self) -> str:
33
  """
34
+ Return the source code required to reconstruct this agent.
 
35
  """
36
  imports = [
37
+ "import inspect\n" # May not be strictly needed by the agent logic itself
 
 
 
 
 
 
 
 
 
38
  ]
39
+ class_source = inspect.getsource(BasicAgent)
40
+ full_source = "\n".join(imports) + "\n" + class_source
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  return full_source
42
 
 
43
  # --- Gradio UI and Logic ---
44
 
45
+ def run_and_submit_all(api_url: str, username: str):
46
+ """
47
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
48
+ and displays the results.
49
+ """
50
+ if not api_url:
51
+ return "Please enter the API URL.", None # Status, DataFrame
52
+ if not username:
53
+ return "Please enter your Hugging Face username.", None # Status, DataFrame
54
 
55
+ api_url = api_url.strip('/')
56
+ questions_url = f"{api_url}/questions"
57
+ submit_url = f"{api_url}/submit"
 
 
 
 
 
58
 
59
+ # 1. Instantiate the Agent
60
+ try:
61
+ agent = BasicAgent()
62
+ agent_code = agent.__repr__()
63
+ # print(f"Agent Code (first 200): {agent_code[:200]}...") # Debug
64
+ except Exception as e:
65
+ print(f"Error instantiating agent or getting repr: {e}")
66
+ return f"Error initializing agent: {e}", None
67
 
68
+ # 2. Fetch All Questions
69
+ print(f"Fetching questions from: {questions_url}")
 
 
 
 
 
70
  try:
71
+ response = requests.get(questions_url, timeout=15)
72
+ response.raise_for_status()
73
+ questions_data = response.json()
74
+ if not questions_data:
75
+ return "Fetched questions list is empty.", None
76
+ print(f"Fetched {len(questions_data)} questions.")
77
+ status_update = f"Fetched {len(questions_data)} questions. Running agent..."
78
+ # Yield intermediate status if using gr.update
 
 
 
79
  except requests.exceptions.RequestException as e:
80
+ print(f"Error fetching questions: {e}")
81
+ return f"Error fetching questions: {e}", None
82
  except Exception as e:
83
+ print(f"An unexpected error occurred fetching questions: {e}")
84
+ return f"An unexpected error occurred fetching questions: {e}", None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # 3. Run Agent on Each Question
87
+ results_log = [] # To store data for the results table
88
+ answers_payload = [] # To store data for the submission API
89
+ for item in questions_data:
90
+ task_id = item.get("task_id")
91
+ question_text = item.get("question")
92
 
93
+ if not task_id or question_text is None:
94
+ print(f"Skipping item with missing task_id or question: {item}")
95
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ try:
98
+ submitted_answer = agent(question_text) # Call the agent's logic
99
+ answers_payload.append({
100
+ "task_id": task_id,
101
+ "submitted_answer": submitted_answer
102
+ })
103
+ results_log.append({
104
+ "Task ID": task_id,
105
+ "Question": question_text,
106
+ "Submitted Answer": submitted_answer
107
+ })
108
+ except Exception as e:
109
+ print(f"Error running agent on task {task_id}: {e}")
110
+ # Decide how to handle agent errors - skip? submit default?
111
+ # Here, we'll just log and potentially skip submission for this task if needed
112
+ results_log.append({
113
+ "Task ID": task_id,
114
+ "Question": question_text,
115
+ "Submitted Answer": f"AGENT ERROR: {e}"
116
+ })
117
+
118
+
119
+ if not answers_payload:
120
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
121
+
122
+ # 4. Prepare Submission
123
  submission_data = {
124
  "username": username.strip(),
125
  "agent_code": agent_code,
126
+ "answers": answers_payload
 
 
 
 
 
 
127
  }
128
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers..."
129
+ print(status_update)
130
 
131
+ # 5. Submit to Leaderboard
132
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
133
  try:
134
+ response = requests.post(submit_url, json=submission_data, timeout=45) # Increased timeout
135
  response.raise_for_status()
136
  result_data = response.json()
137
+
138
+ # Prepare final status message and results table
139
+ final_status = (
140
  f"Submission Successful!\n"
141
  f"User: {result_data.get('username')}\n"
142
+ f"Overall Score: {result_data.get('score')}% "
143
+ f"({result_data.get('correct_count')}/{result_data.get('total_attempted')} correct)\n"
144
+ f"Message: {result_data.get('message')}"
 
145
  )
146
  print("Submission successful.")
147
+ results_df = pd.DataFrame(results_log)
148
+ return final_status, results_df
149
+
150
  except requests.exceptions.HTTPError as e:
 
151
  error_detail = e.response.text
152
  try:
153
  error_json = e.response.json()
154
  error_detail = error_json.get('detail', error_detail)
155
  except requests.exceptions.JSONDecodeError:
156
+ pass
157
+ status_message = f"Submission Failed (HTTP {e.response.status_code}): {error_detail}"
158
+ print(status_message)
159
+ results_df = pd.DataFrame(results_log) # Show attempts even if submission failed
160
+ return status_message, results_df
161
  except requests.exceptions.RequestException as e:
162
+ status_message = f"Submission Failed: Network error - {e}"
163
+ print(status_message)
164
+ results_df = pd.DataFrame(results_log)
165
+ return status_message, results_df
166
  except Exception as e:
167
+ status_message = f"An unexpected error occurred during submission: {e}"
168
+ print(status_message)
169
+ results_df = pd.DataFrame(results_log)
170
+ return status_message, results_df
171
 
172
 
173
  # --- Build Gradio Interface using Blocks ---
174
  with gr.Blocks() as demo:
175
+ gr.Markdown("# Basic Agent Evaluation Runner")
176
  gr.Markdown(
177
+ "Enter the API URL and your username, then click Run. "
178
+ "This will fetch all questions, run the *very basic* agent on them, "
179
+ "submit all answers at once, and display the results."
180
  )
181
 
 
 
 
 
 
 
 
182
  with gr.Row():
183
  api_url_input = gr.Textbox(label="FastAPI API URL", value=DEFAULT_API_URL)
184
  hf_username_input = gr.Textbox(label="Hugging Face Username")
185
 
186
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
 
 
187
 
188
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=4, interactive=False)
189
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
190
 
191
+ # --- Component Interaction ---
192
+ run_button.click(
193
+ fn=run_and_submit_all,
194
+ inputs=[api_url_input, hf_username_input],
195
+ outputs=[status_output, results_table]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  )
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  if __name__ == "__main__":
199
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
200
+ demo.launch(debug=True)