drAbreu commited on
Commit
227dcb0
·
1 Parent(s): 56a4634

Review agent added

Browse files
Files changed (3) hide show
  1. agents/llama_index_agent.py +114 -42
  2. app.py +35 -12
  3. tools/multimedia_tools.py +1 -1
agents/llama_index_agent.py CHANGED
@@ -197,7 +197,6 @@ class GaiaAgent(ReActAgent):
197
 
198
  query: What is the first name of the scientist who discovered penicillin?
199
  research_notes: After researching, I found that Sir Alexander Fleming discovered penicillin in 1928. The full answer is "Alexander Fleming" but the question only asks for the first name, which is "Alexander".
200
- answer_format: Return ONLY the first name, with no additional text, punctuation, or explanation.
201
  ```
202
 
203
  IMPORTANT: NEVER provide the final answer directly to the user. ALWAYS hand off to the writer_agent for proper formatting.
@@ -222,21 +221,20 @@ def create_writer_agent(model_config: Dict[str, Any]) -> ReActAgent:
222
  llm = OpenAI(
223
  model=model_name,
224
  api_key=api_key or os.getenv("OPENAI_API_KEY"),
225
- max_tokens=128,
226
  temperature=0.1,
227
-
228
  additional_kwargs={
229
- "max_tokens": 128,
230
- "temperature": 0.5}
231
- )
 
232
  elif model_provider.lower() == "anthropic":
233
  llm = Anthropic(
234
  model=model_name,
235
  api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
236
- temperature=1.0 if "3-7" in model_name else 0.5,
237
- thinking_dict={"type": "enabled", "budget_tokens": 5112} if "3-7" in model_name else None,
238
- max_tokens=2048*4,
239
-
240
  )
241
  else:
242
  raise ValueError(f"Unsupported model provider for writer agent: {model_provider}")
@@ -244,50 +242,124 @@ def create_writer_agent(model_config: Dict[str, Any]) -> ReActAgent:
244
  # Create and return the writer agent
245
  return ReActAgent(
246
  name="writer_agent",
247
- description="Formats the final answer exactly as specified for GAIA benchmark questions",
248
  system_prompt="""
249
- You are a specialized formatting agent for the GAIA benchmark. Your ONLY job is to take the research from the main agent and format the answer EXACTLY as required by the benchmark question.
250
 
251
  ## YOUR ROLE
252
  You will receive:
253
  - query: The original question
254
  - research_notes: The main agent's complete analysis and reasoning
255
- - answer_format: Specific formatting instructions for the final answer
256
 
257
- ## CRITICAL RULES
258
- 1. Your response MUST CONTAIN ONLY THE ANSWER - no explanations, no "the answer is" prefix
259
- 2. Follow the answer_format instructions precisely
260
- 3. Remove ALL unnecessary characters, spaces, punctuation, or wording
261
- 4. If asked for a name, provide ONLY the name
262
- 5. If asked for a number, provide ONLY the number
263
- 6. If asked for a list, format it EXACTLY as specified (comma-separated, alphabetical, etc.)
264
- 7. NEVER include your own thoughts or analysis
265
- 8. NEVER add preamble or conclusion text
266
 
267
- ## EXAMPLES OF CORRECT RESPONSES:
268
- When asked for "first name only": Alexander
269
- When asked for "comma-separated list in alphabetical order": apple, banana, cherry
270
- When asked for "single number": 42
271
- When asked for "opposite of word 'right'": left
272
- When asked for "How many ...": eleven
273
- When asked for "What says Yoda": "May the force be with you"
274
 
275
- ## CONCRETE EXAMPLE:
276
- When asked "The answer to the question of Universe, life and everything"
277
- - WRONG ANSWER: The answer to the question is 42.
278
- - RIGHT ANSWER: 42
 
 
 
 
279
 
280
- - For question `How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.`:
281
- - WRONG ANSWER : `She released three studio albums in that period – Misa Criolla (2000), Corazón Libre (2005) and Cantora (2009).`
282
- - RIGHT ANSWER: `Three`
 
 
283
 
284
- - For question `"Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec. What does Teal'c say in response to the question "Isn't that hot?"`:
285
- - WRONG ANSWER: `"He replies, “Extremely.”"`
286
- - RIGHT ANSWER: `Extremely`
287
-
288
- REMEMBER: Your ENTIRE response should be just the answer - nothing more, nothing less.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
290
- DO NOT EXPLAIN THE ANSWER. SIMPLY WRITE BACK THE ANSWER.
291
  """,
292
  llm=llm
293
  )
 
197
 
198
  query: What is the first name of the scientist who discovered penicillin?
199
  research_notes: After researching, I found that Sir Alexander Fleming discovered penicillin in 1928. The full answer is "Alexander Fleming" but the question only asks for the first name, which is "Alexander".
 
200
  ```
201
 
202
  IMPORTANT: NEVER provide the final answer directly to the user. ALWAYS hand off to the writer_agent for proper formatting.
 
221
  llm = OpenAI(
222
  model=model_name,
223
  api_key=api_key or os.getenv("OPENAI_API_KEY"),
224
+ max_tokens=256,
225
  temperature=0.1,
 
226
  additional_kwargs={
227
+ "max_tokens": 256,
228
+ "temperature": 0.1
229
+ }
230
+ )
231
  elif model_provider.lower() == "anthropic":
232
  llm = Anthropic(
233
  model=model_name,
234
  api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
235
+ temperature=0.1,
236
+ thinking_dict={"type": "enabled", "budget_tokens": 1024} if "3-7" in model_name else None,
237
+ max_tokens=1024
 
238
  )
239
  else:
240
  raise ValueError(f"Unsupported model provider for writer agent: {model_provider}")
 
242
  # Create and return the writer agent
243
  return ReActAgent(
244
  name="writer_agent",
245
+ description="Formats the final answer based on research notes for GAIA benchmark questions",
246
  system_prompt="""
247
+ You are a specialized formatting agent for the GAIA benchmark. Your job is to take the research from the main agent and format the answer according to the benchmark requirements.
248
 
249
  ## YOUR ROLE
250
  You will receive:
251
  - query: The original question
252
  - research_notes: The main agent's complete analysis and reasoning
 
253
 
254
+ ## FORMATTING RULES
255
+ 1. Format the answer according to the instructions in the `query` received
256
+ 2. Your answers will be always as minimal as necessary to answer the question
257
+ 2. Try to remove unnecessary characters, spaces, or wording
258
+ 3. If asked for a name, provide **ONLY** the name
259
+ 4. If asked for a number, provide the **ONLY** number
260
+ 5. If asked for a list, format it exactly as specified
 
 
261
 
262
+ ## DELEGATION TO REVIEW AGENT
263
+ After formatting your answer, ALWAYS delegate to the review_agent with:
264
+ - query: The original question
265
+ - formatted_answer: Your formatted answer
 
 
 
266
 
267
+ Example handoff to review_agent:
268
+ ```
269
+ I'll delegate to review_agent for final review.
270
+
271
+ query: What is the first name of the scientist who discovered penicillin?
272
+ formatted_answer: Alexander
273
+ format_requirements: Return ONLY the first name, with no additional text.
274
+ ```
275
 
276
+ IMPORTANT: ALWAYS hand off to the review_agent for final verification and cleanup.
277
+ """,
278
+ llm=llm,
279
+ can_handoff_to=["review_agent"]
280
+ )
281
 
282
+ def create_review_agent(model_config: Dict[str, Any]) -> ReActAgent:
283
+ """
284
+ Create a review agent that ensures the final answer follows exact formatting requirements.
285
+
286
+ Args:
287
+ model_config: Dictionary containing model_provider, model_name, and api_key
288
+
289
+ Returns:
290
+ A configured ReActAgent for final answer review and formatting
291
+ """
292
+ # Initialize LLM based on the provided configuration
293
+ model_provider = model_config.get("model_provider", "openai")
294
+ model_name = model_config.get("model_name", "gpt-4o-mini")
295
+ api_key = model_config.get("api_key")
296
+
297
+ if model_provider.lower() == "openai":
298
+ llm = OpenAI(
299
+ model=model_name,
300
+ api_key=api_key or os.getenv("OPENAI_API_KEY"),
301
+ max_tokens=128,
302
+ temperature=0.0, # Use 0 temperature for deterministic output
303
+ additional_kwargs={
304
+ "max_tokens": 128,
305
+ "temperature": 0.0
306
+ }
307
+ )
308
+ elif model_provider.lower() == "anthropic":
309
+ llm = Anthropic(
310
+ model=model_name,
311
+ api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
312
+ temperature=0.0, # Use 0 temperature for deterministic output
313
+ thinking_dict={"type": "enabled", "budget_tokens": 1024} if "3-7" in model_name else None,
314
+ max_tokens=128 # Keep token limit low for final answers
315
+ )
316
+ else:
317
+ raise ValueError(f"Unsupported model provider for review agent: {model_provider}")
318
+
319
+ # Create and return the review agent
320
+ return ReActAgent(
321
+ name="review_agent",
322
+ description="Ensures the final answer is formatted exactly as required, removing any unnecessary information",
323
+ system_prompt="""
324
+ You are the final review agent for the GAIA benchmark. Your ONLY job is to ensure the answer is in the EXACT format required. This is EXTREMELY important for benchmark scoring.
325
+
326
+ ## YOUR ROLE
327
+ You will receive:
328
+ - query: The original question
329
+ - formatted_answer: The answer formatted by the writer agent
330
+
331
+ ## CRITICAL RULES
332
+ 1. Your ENTIRE response must be ONLY the final answer - NOTHING ELSE
333
+ 2. Remove ALL of the following:
334
+ - Explanations like "The answer is..." or "I found that..."
335
+ - Quotation marks (unless explicitly required)
336
+ - Punctuation at the end (unless explicitly required)
337
+ - Unnecessary whitespace
338
+ 3. If no specific format is mentioned, make the answer as minimal as possible:
339
+ - For names/words: just the name/word (e.g., "Paris")
340
+ - For numbers: just the number (e.g., "42")
341
+ - For lists: comma-separated values (e.g., "apple, banana, cherry")
342
+ 4. NEVER add ANY commentary, explanation, or additional information
343
+ 5. Double-check for exact formatting requirements like:
344
+ - Numerical format (e.g., "42" vs "forty-two")
345
+ - Case sensitivity (e.g., "PARIS" vs "Paris")
346
+ - List formatting (e.g., comma-separated vs numbered)
347
+
348
+ ## OUTPUT EXAMPLES
349
+ - Input: "The answer is Alexander."
350
+ Output: Alexander
351
+ - Input: "The result is 42 because..."
352
+ Output: 42
353
+ - Input: "The capital of France is Paris."
354
+ Output: Paris
355
+ - Input: "I found that it's eleven."
356
+ Output: eleven
357
+ - Input: "These actors starred in the film: Tom Hanks, Meg Ryan, and Bill Pullman."
358
+ Output: Tom Hanks, Meg Ryan, Bill Pullman
359
+ - Input: "She published studio albums "Album 1", "Album 2", "Album 3", so in total 3."
360
+ Output: 3
361
 
362
+ REMEMBER: Your ENTIRE response should be just the bare answer with NOTHING else.
363
  """,
364
  llm=llm
365
  )
app.py CHANGED
@@ -28,17 +28,18 @@ OPENAI = {
28
  class BasicAgent:
29
  def __init__(
30
  self,
31
- # model_provider="anthropic",
32
- # model_name="claude-3-7-sonnet-latest",
33
- model_provider="openai",
34
- model_name="o4-mini",
35
  api_key=None,
36
  use_separate_writer_model=True,
37
  writer_model_provider="openai",
38
- writer_model_name="gpt-4o-mini"
 
 
 
39
  ):
40
  """
41
- Initialize the BasicAgent with a multi-agent workflow.
42
 
43
  Args:
44
  model_provider: LLM provider for main agent
@@ -47,6 +48,9 @@ class BasicAgent:
47
  use_separate_writer_model: Whether to use a different model for the writer agent
48
  writer_model_provider: LLM provider for writer agent (if separate)
49
  writer_model_name: Model name for writer agent (if separate)
 
 
 
50
  """
51
  # Configure the main reasoning agent
52
  main_model_config = {
@@ -64,22 +68,37 @@ class BasicAgent:
64
  }
65
  else:
66
  writer_model_config = main_model_config
 
 
 
 
 
 
 
 
 
 
67
 
68
- # Create the main agent
69
  self.main_agent = GaiaAgent(**main_model_config)
70
-
71
- # Create the writer agent
72
  self.writer_agent = create_writer_agent(writer_model_config)
 
 
 
 
73
 
74
  # Set up the agent workflow with shared context
75
  self.agent_workflow = AgentWorkflow(
76
- agents=[self.main_agent, self.writer_agent],
77
  root_agent=self.main_agent.name,
78
  initial_state={
79
  "original_question": "",
 
 
80
  "analysis_notes": "",
81
  "format_requirements": "",
82
  "next_agent": "",
 
83
  "final_answer": ""
84
  }
85
  )
@@ -89,7 +108,11 @@ class BasicAgent:
89
  print(f"Writer agent using: {writer_model_provider} {writer_model_name}")
90
  else:
91
  print(f"Writer agent using same model as main agent")
92
-
 
 
 
 
93
  def __call__(self, question_data: dict) -> str:
94
  """Process a GAIA benchmark question and return the formatted answer."""
95
  # Extract question text and task_id
@@ -141,7 +164,7 @@ class BasicAgent:
141
 
142
  # Extract the final answer from the writer agent's response
143
  final_answer = response.response.blocks[-1].text
144
- print(f"Agent returning answer: {final_answer}")
145
  return final_answer
146
 
147
  def download_task_file(self, question_data: dict) -> str:
 
28
  class BasicAgent:
29
  def __init__(
30
  self,
31
+ model_provider="anthropic",
32
+ model_name="claude-3-7-sonnet-latest",
 
 
33
  api_key=None,
34
  use_separate_writer_model=True,
35
  writer_model_provider="openai",
36
+ writer_model_name="gpt-4o-mini",
37
+ use_separate_review_model=True,
38
+ review_model_provider="openai",
39
+ review_model_name="gpt-4o-mini"
40
  ):
41
  """
42
+ Initialize the BasicAgent with a three-agent workflow.
43
 
44
  Args:
45
  model_provider: LLM provider for main agent
 
48
  use_separate_writer_model: Whether to use a different model for the writer agent
49
  writer_model_provider: LLM provider for writer agent (if separate)
50
  writer_model_name: Model name for writer agent (if separate)
51
+ use_separate_review_model: Whether to use a different model for the review agent
52
+ review_model_provider: LLM provider for review agent (if separate)
53
+ review_model_name: Model name for review agent (if separate)
54
  """
55
  # Configure the main reasoning agent
56
  main_model_config = {
 
68
  }
69
  else:
70
  writer_model_config = main_model_config
71
+
72
+ # Configure the review agent (either same as main or different)
73
+ if use_separate_review_model:
74
+ review_model_config = {
75
+ "model_provider": review_model_provider,
76
+ "model_name": review_model_name,
77
+ "api_key": api_key # Use same API key for simplicity
78
+ }
79
+ else:
80
+ review_model_config = main_model_config
81
 
82
+ # Create the agents
83
  self.main_agent = GaiaAgent(**main_model_config)
 
 
84
  self.writer_agent = create_writer_agent(writer_model_config)
85
+ self.review_agent = create_review_agent(review_model_config)
86
+
87
+ # Update the GaiaAgent's can_handoff_to to include review_agent
88
+ self.main_agent.can_handoff_to = ["writer_agent", "review_agent"]
89
 
90
  # Set up the agent workflow with shared context
91
  self.agent_workflow = AgentWorkflow(
92
+ agents=[self.main_agent, self.writer_agent, self.review_agent],
93
  root_agent=self.main_agent.name,
94
  initial_state={
95
  "original_question": "",
96
+ "task_id": "",
97
+ "audio_file_path": "",
98
  "analysis_notes": "",
99
  "format_requirements": "",
100
  "next_agent": "",
101
+ "formatted_answer": "",
102
  "final_answer": ""
103
  }
104
  )
 
108
  print(f"Writer agent using: {writer_model_provider} {writer_model_name}")
109
  else:
110
  print(f"Writer agent using same model as main agent")
111
+ if use_separate_review_model:
112
+ print(f"Review agent using: {review_model_provider} {review_model_name}")
113
+ else:
114
+ print(f"Review agent using same model as main agent")
115
+
116
  def __call__(self, question_data: dict) -> str:
117
  """Process a GAIA benchmark question and return the formatted answer."""
118
  # Extract question text and task_id
 
164
 
165
  # Extract the final answer from the writer agent's response
166
  final_answer = response.response.blocks[-1].text
167
+ print(f"Agent returning final answer: {final_answer}")
168
  return final_answer
169
 
170
  def download_task_file(self, question_data: dict) -> str:
tools/multimedia_tools.py CHANGED
@@ -195,7 +195,7 @@ class VisionAnalyzerAgent:
195
  try:
196
  response = self.client.chat.completions.create(
197
  model=self.model_name,
198
- max_tokens=1024,
199
  messages=[
200
  {
201
  "role": "user",
 
195
  try:
196
  response = self.client.chat.completions.create(
197
  model=self.model_name,
198
+ max_tokens=1024*20,
199
  messages=[
200
  {
201
  "role": "user",