Delanoe Pirard commited on
Commit
a23082c
·
1 Parent(s): 78bc862

First commit

Browse files
Files changed (48) hide show
  1. .env +50 -0
  2. .idea/.gitignore +8 -0
  3. .idea/GAIA_Agent.iml +14 -0
  4. .idea/inspectionProfiles/profiles_settings.xml +6 -0
  5. .idea/misc.xml +7 -0
  6. .idea/modules.xml +8 -0
  7. .idea/vcs.xml +6 -0
  8. agents/__init__.py +0 -0
  9. agents/__pycache__/__init__.cpython-311.pyc +0 -0
  10. agents/__pycache__/advanced_validation_agent.cpython-311.pyc +0 -0
  11. agents/__pycache__/code_agent.cpython-311.pyc +0 -0
  12. agents/__pycache__/figure_interpretation_agent.cpython-311.pyc +0 -0
  13. agents/__pycache__/image_analyzer_agent.cpython-311.pyc +0 -0
  14. agents/__pycache__/long_context_management_agent.cpython-311.pyc +0 -0
  15. agents/__pycache__/math_agent.cpython-311.pyc +0 -0
  16. agents/__pycache__/planner_agent.cpython-311.pyc +0 -0
  17. agents/__pycache__/reasoning_agent.cpython-311.pyc +0 -0
  18. agents/__pycache__/research_agent.cpython-311.pyc +0 -0
  19. agents/__pycache__/role_agent.cpython-311.pyc +0 -0
  20. agents/__pycache__/text_analyzer_agent.cpython-311.pyc +0 -0
  21. agents/__pycache__/verifier_agent.cpython-311.pyc +0 -0
  22. agents/advanced_validation_agent.py +404 -0
  23. agents/code_agent.py +193 -0
  24. agents/figure_interpretation_agent.py +303 -0
  25. agents/image_analyzer_agent.py +96 -0
  26. agents/long_context_management_agent.py +452 -0
  27. agents/math_agent.py +696 -0
  28. agents/planner_agent.py +253 -0
  29. agents/reasoning_agent.py +167 -0
  30. agents/research_agent.py +622 -0
  31. agents/role_agent.py +215 -0
  32. agents/text_analyzer_agent.py +388 -0
  33. agents/verifier_agent.py +300 -0
  34. app.py +421 -0
  35. current_architecture.md +91 -0
  36. gaia_improvement_plan.md +943 -0
  37. prompts/advanced_validation_agent_prompt.txt +31 -0
  38. prompts/code_gen_prompt.txt +14 -0
  39. prompts/figure_interpretation_agent_prompt.txt +29 -0
  40. prompts/image_analyzer_prompt.txt +69 -0
  41. prompts/long_context_management_agent_prompt.txt +28 -0
  42. prompts/planner_agent_prompt.txt +33 -0
  43. prompts/reasoning_agent_prompt.txt +13 -0
  44. prompts/text_analyzer_prompt.txt +43 -0
  45. pyproject.toml +31 -0
  46. todo.md +44 -0
  47. user_requirements.md +63 -0
  48. uv.lock +0 -0
.env ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables for GAIA Multi-Agent Framework
2
+
3
+ # API Keys
4
+ GEMINI_API_KEY="AIzaSyDOQRtAJd-Kj-H6VT_0t38cZTz4Halgi3U" # For Google AI Studio
5
+ GOOGLE_API_KEY="AIzaSyACcl4uzlyqz4glW-_uCj0xGPSSH0uloAY" # For Google Custom Search JSON API
6
+ GOOGLE_CSE_ID="004c6b8673f0c4dd5" # For Google Custom Search Engine ID
7
+ TAVILY_API_KEY="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" # For Tavily Search API
8
+ ALPAFLOW_OPENAI_API_KEY="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" # For o4-mini model (or other OpenAI compatible endpoint)
9
+ WOLFRAM_ALPHA_APP_ID="YOUR_WOLFRAM_ALPHA_APP_ID" # For WolframAlpha API
10
+
11
+ # GAIA Benchmark API
12
+ GAIA_API_URL="https://agents-course-unit4-scoring.hf.space"
13
+
14
+ # Model Names (using defaults from original code, can be overridden)
15
+ ROLE_EMBED_MODEL="Snowflake/snowflake-arctic-embed-l-v2.0"
16
+ ROLE_RERANKER_MODEL="Alibaba-NLP/gte-multilingual-reranker-base"
17
+ ROLE_PROMPT_DATASET="fka/awesome-chatgpt-prompts"
18
+ ROLE_LLM_MODEL="models/gemini-1.5-pro"
19
+
20
+ IMAGE_ANALYZER_LLM_MODEL="models/gemini-1.5-pro"
21
+
22
+ VERIFIER_LLM_MODEL="models/gemini-2.0-flash"
23
+ VERIFIER_AGENT_LLM_MODEL="models/gemini-1.5-pro"
24
+ VERIFIER_CONFIDENCE_THRESHOLD="0.7"
25
+
26
+ RESEARCH_AGENT_LLM_MODEL="models/gemini-1.5-pro"
27
+ # RESEARCH_AGENT_CHROME_NO_SANDBOX="true" # Example config for research agent browser
28
+ # RESEARCH_AGENT_CHROME_DISABLE_DEV_SHM="true"
29
+
30
+ TEXT_ANALYZER_LLM_MODEL="models/gemini-1.5-pro"
31
+ TEXT_ANALYZER_AGENT_LLM_MODEL="models/gemini-1.5-pro"
32
+
33
+ REASONING_TOOL_LLM_MODEL="o4-mini"
34
+ REASONING_TOOL_API_KEY_ENV="ALPAFLOW_OPENAI_API_KEY" # Env var name containing the key for reasoning tool LLM
35
+ REASONING_AGENT_LLM_MODEL="models/gemini-1.5-pro"
36
+
37
+ PLANNER_TOOL_LLM_MODEL="models/gemini-1.5-pro"
38
+ PLANNER_AGENT_LLM_MODEL="models/gemini-1.5-pro"
39
+
40
+ CODE_GEN_LLM_MODEL="o4-mini"
41
+ CODE_GEN_API_KEY_ENV="ALPAFLOW_OPENAI_API_KEY" # Env var name containing the key for code gen LLM
42
+ CODE_AGENT_LLM_MODEL="models/gemini-1.5-pro"
43
+
44
+ MATH_AGENT_LLM_MODEL="models/gemini-1.5-pro"
45
+
46
+ # New Feature Config (Placeholders)
47
+ YOUTUBE_CHUNK_DURATION_SECONDS="60"
48
+ TRANSCRIPTION_WHISPER_CPP_PATH="/path/to/whisper.cpp/main" # Example path
49
+ TRANSCRIPTION_WHISPER_MODEL_PATH="/path/to/whisper/model.bin" # Example path
50
+
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/GAIA_Agent.iml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="uv (GAIA_Agent)" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ <component name="PyDocumentationSettings">
11
+ <option name="format" value="PLAIN" />
12
+ <option name="myDocStringFormat" value="Plain" />
13
+ </component>
14
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="uv (Zonos)" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="uv (GAIA_Agent)" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/GAIA_Agent.iml" filepath="$PROJECT_DIR$/.idea/GAIA_Agent.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
agents/__init__.py ADDED
File without changes
agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (163 Bytes). View file
 
agents/__pycache__/advanced_validation_agent.cpython-311.pyc ADDED
Binary file (22.8 kB). View file
 
agents/__pycache__/code_agent.cpython-311.pyc ADDED
Binary file (10.6 kB). View file
 
agents/__pycache__/figure_interpretation_agent.cpython-311.pyc ADDED
Binary file (16.5 kB). View file
 
agents/__pycache__/image_analyzer_agent.cpython-311.pyc ADDED
Binary file (4.95 kB). View file
 
agents/__pycache__/long_context_management_agent.cpython-311.pyc ADDED
Binary file (23.9 kB). View file
 
agents/__pycache__/math_agent.cpython-311.pyc ADDED
Binary file (47.5 kB). View file
 
agents/__pycache__/planner_agent.cpython-311.pyc ADDED
Binary file (12.5 kB). View file
 
agents/__pycache__/reasoning_agent.cpython-311.pyc ADDED
Binary file (8.62 kB). View file
 
agents/__pycache__/research_agent.cpython-311.pyc ADDED
Binary file (40.8 kB). View file
 
agents/__pycache__/role_agent.cpython-311.pyc ADDED
Binary file (12.7 kB). View file
 
agents/__pycache__/text_analyzer_agent.cpython-311.pyc ADDED
Binary file (23.7 kB). View file
 
agents/__pycache__/verifier_agent.cpython-311.pyc ADDED
Binary file (17 kB). View file
 
agents/advanced_validation_agent.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ from typing import List, Dict, Optional, Union
5
+ from dotenv import load_dotenv
6
+
7
+ from llama_index.core.agent.workflow import ReActAgent
8
+ from llama_index.core.tools import FunctionTool
9
+ from llama_index.llms.google_genai import GoogleGenAI
10
+ # Assuming research_agent might be needed for handoff, but not directly imported
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Setup logging
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Helper function to load prompt from file
19
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
20
+ """Loads a prompt from a text file."""
21
+ try:
22
+ script_dir = os.path.dirname(__file__)
23
+ prompt_path = os.path.join(script_dir, filename)
24
+ with open(prompt_path, "r") as f:
25
+ prompt = f.read()
26
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
27
+ return prompt
28
+ except FileNotFoundError:
29
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
30
+ return default_prompt
31
+ except Exception as e:
32
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
33
+ return default_prompt
34
+
35
+ # --- Tool Functions ---
36
+
37
+ # Note: cross_reference_check might require fetching content.
38
+ # This version assumes content is provided or delegates fetching via handoff.
39
+ def cross_reference_check(claim: str, sources_content: List[Dict[str, str]]) -> Dict[str, Union[str, List[str]]]:
40
+ """Verifies a claim against provided source content.
41
+ Args:
42
+ claim (str): The statement or piece of information to verify.
43
+ sources_content (List[Dict[str, str]]): A list of dictionaries, each with "url" (optional) and "content" keys.
44
+ Returns:
45
+ Dict: A dictionary summarizing findings (supporting, contradicting, inconclusive) per source.
46
+ """
47
+ logger.info(f"Cross-referencing claim: {claim[:100]}... against {len(sources_content)} sources.")
48
+ if not sources_content:
49
+ return {"error": "No source content provided for cross-referencing."}
50
+
51
+ # LLM configuration
52
+ llm_model = os.getenv("VALIDATION_LLM_MODEL", "models/gemini-1.5-pro") # Use a capable model
53
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
54
+ if not gemini_api_key:
55
+ logger.error("GEMINI_API_KEY not found for cross-referencing LLM.")
56
+ return {"error": "GEMINI_API_KEY not set."}
57
+
58
+ results = []
59
+ try:
60
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model)
61
+ logger.info(f"Using cross-referencing LLM: {llm_model}")
62
+
63
+ for i, source in enumerate(sources_content):
64
+ source_url = source.get("url", f"Source {i+1}")
65
+ content = source.get("content", "")
66
+ if not content:
67
+ logger.warning(f"Source {source_url} has no content.")
68
+ results.append({"source": source_url, "finding": "inconclusive", "reason": "No content provided"})
69
+ continue
70
+
71
+ # Truncate long content
72
+ max_content_len = 15000
73
+ if len(content) > max_content_len:
74
+ logger.warning(f"Truncating content from {source_url} to {max_content_len} chars.")
75
+ content = content[:max_content_len]
76
+
77
+ prompt = (
78
+ f"Review the following source content and determine if it supports, "
79
+ f"contradicts, or is inconclusive regarding the claim.\n\n"
80
+ f"CLAIM: {claim}\n\n"
81
+ f"SOURCE CONTENT from {source_url}:\n{content}\n\n"
82
+ f"ANALYSIS: Does the source content directly support the claim, directly contradict it, "
83
+ f"or provide no relevant information (inconclusive)? "
84
+ f"Provide a brief reason for your conclusion. Respond in JSON format: "
85
+ f'{{"finding": "support/contradict/inconclusive", "reason": "Your brief explanation"}}'
86
+ )
87
+
88
+ response = llm.complete(prompt)
89
+ try:
90
+ # Attempt to parse JSON, handle potential markdown fences
91
+ json_str = response.text.strip()
92
+ if json_str.startswith("```json"):
93
+ json_str = json_str[7:]
94
+ if json_str.endswith("```"):
95
+ json_str = json_str[:-3]
96
+ finding_data = json.loads(json_str.strip())
97
+ results.append({
98
+ "source": source_url,
99
+ "finding": finding_data.get("finding", "error"),
100
+ "reason": finding_data.get("reason", "LLM response parsing failed")
101
+ })
102
+ except json.JSONDecodeError:
103
+ logger.error(f"Failed to parse JSON response for source {source_url}: {response.text}")
104
+ results.append({"source": source_url, "finding": "error", "reason": "LLM response not valid JSON"})
105
+ except Exception as parse_err:
106
+ logger.error(f"Error processing LLM response for source {source_url}: {parse_err}")
107
+ results.append({"source": source_url, "finding": "error", "reason": f"Processing error: {parse_err}"})
108
+
109
+ logger.info("Cross-referencing check completed.")
110
+ return {"claim": claim, "results": results}
111
+
112
+ except Exception as e:
113
+ logger.error(f"LLM call failed during cross-referencing: {e}", exc_info=True)
114
+ return {"error": f"Error during cross-referencing: {e}"}
115
+
116
+ def logical_consistency_check(text: str) -> Dict[str, Union[bool, str, List[str]]]:
117
+ """Analyzes text for internal logical contradictions or fallacies using an LLM."""
118
+ logger.info(f"Checking logical consistency for text (length: {len(text)} chars).")
119
+
120
+ # LLM configuration
121
+ llm_model = os.getenv("VALIDATION_LLM_MODEL", "models/gemini-1.5-pro")
122
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
123
+ if not gemini_api_key:
124
+ logger.error("GEMINI_API_KEY not found for consistency check LLM.")
125
+ return {"error": "GEMINI_API_KEY not set."}
126
+
127
+ # Truncate long text
128
+ max_input_chars = 30000
129
+ if len(text) > max_input_chars:
130
+ logger.warning(f"Input text truncated to {max_input_chars} chars for consistency check.")
131
+ text = text[:max_input_chars]
132
+
133
+ prompt = (
134
+ f"Analyze the following text for logical consistency. Identify any internal contradictions, "
135
+ f"logical fallacies, or significant inconsistencies in reasoning. "
136
+ f"If the text is logically consistent, state that clearly. If inconsistencies are found, "
137
+ f"list them with brief explanations.\n\n"
138
+ f"TEXT:\n{text}\n\n"
139
+ f"ANALYSIS: Respond in JSON format: "
140
+ f'{{"consistent": true/false, "findings": ["Description of inconsistency 1", "Description of inconsistency 2", ...]}}'
141
+ f"(If consistent is true, findings should be an empty list)."
142
+ )
143
+
144
+ try:
145
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model, response_mime_type="application/json")
146
+ logger.info(f"Using consistency check LLM: {llm_model}")
147
+ response = llm.complete(prompt)
148
+
149
+ # Attempt to parse JSON
150
+ json_str = response.text.strip()
151
+ if json_str.startswith("```json"):
152
+ json_str = json_str[7:]
153
+ if json_str.endswith("```"):
154
+ json_str = json_str[:-3]
155
+ result_data = json.loads(json_str.strip())
156
+
157
+ # Basic validation
158
+ if "consistent" not in result_data or "findings" not in result_data:
159
+ raise ValueError("LLM response missing required keys: consistent, findings")
160
+ if not isinstance(result_data["findings"], list):
161
+ raise ValueError("LLM response findings key is not a list")
162
+
163
+ logger.info(f"Logical consistency check completed. Consistent: {result_data.get('consistent')}")
164
+ return result_data
165
+
166
+ except json.JSONDecodeError as json_err:
167
+ logger.error(f"Failed to parse JSON response from LLM: {json_err}. Response text: {response.text}")
168
+ return {"error": f"Failed to parse LLM JSON response: {json_err}"}
169
+ except ValueError as val_err:
170
+ logger.error(f"Invalid JSON structure from LLM: {val_err}. Response text: {response.text}")
171
+ return {"error": f"Invalid JSON structure from LLM: {val_err}"}
172
+ except Exception as e:
173
+ logger.error(f"LLM call failed during consistency check: {e}", exc_info=True)
174
+ return {"error": f"Error during consistency check: {e}"}
175
+
176
+ def bias_detection(text: str, source_context: Optional[str] = None) -> Dict[str, Union[bool, List[Dict[str, str]]]]:
177
+ """Examines text for potential biases using an LLM, considering source context if provided."""
178
+ logger.info(f"Detecting bias in text (length: {len(text)} chars). Context provided: {source_context is not None}")
179
+
180
+ # LLM configuration
181
+ llm_model = os.getenv("VALIDATION_LLM_MODEL", "models/gemini-1.5-pro")
182
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
183
+ if not gemini_api_key:
184
+ logger.error("GEMINI_API_KEY not found for bias detection LLM.")
185
+ return {"error": "GEMINI_API_KEY not set."}
186
+
187
+ # Truncate long text/context
188
+ max_input_chars = 25000
189
+ if len(text) > max_input_chars:
190
+ logger.warning(f"Input text truncated to {max_input_chars} chars for bias detection.")
191
+ text = text[:max_input_chars]
192
+ if source_context and len(source_context) > 5000:
193
+ logger.warning(f"Source context truncated to 5000 chars for bias detection.")
194
+ source_context = source_context[:5000]
195
+
196
+ context_prompt = f"\nSOURCE CONTEXT (optional background about the source):\n{source_context}" if source_context else ""
197
+
198
+ prompt = (
199
+ f"Analyze the following text for potential cognitive and presentation biases (e.g., confirmation bias, framing, selection bias, loaded language, appeal to emotion). "
200
+ f"Consider the language, tone, and selection of information. Also consider the source context if provided. "
201
+ f"If no significant biases are detected, state that clearly. If biases are found, list them, identify the type of bias, and provide a brief explanation with evidence from the text.\n\n"
202
+ f"TEXT:\n{text}"
203
+ f"{context_prompt}\n\n"
204
+ f"ANALYSIS: Respond in JSON format: "
205
+ f'{{"bias_detected": true/false, "findings": [{{"bias_type": "Type of Bias", "explanation": "Explanation with evidence"}}, ...]}}'
206
+ f"(If bias_detected is false, findings should be an empty list)."
207
+ )
208
+
209
+ try:
210
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model, response_mime_type="application/json")
211
+ logger.info(f"Using bias detection LLM: {llm_model}")
212
+ response = llm.complete(prompt)
213
+
214
+ # Attempt to parse JSON
215
+ json_str = response.text.strip()
216
+ if json_str.startswith("```json"):
217
+ json_str = json_str[7:]
218
+ if json_str.endswith("```"):
219
+ json_str = json_str[:-3]
220
+ result_data = json.loads(json_str.strip())
221
+
222
+ # Basic validation
223
+ if "bias_detected" not in result_data or "findings" not in result_data:
224
+ raise ValueError("LLM response missing required keys: bias_detected, findings")
225
+ if not isinstance(result_data["findings"], list):
226
+ raise ValueError("LLM response findings key is not a list")
227
+
228
+ logger.info(f"Bias detection check completed. Bias detected: {result_data.get('bias_detected')}")
229
+ return result_data
230
+
231
+ except json.JSONDecodeError as json_err:
232
+ logger.error(f"Failed to parse JSON response from LLM: {json_err}. Response text: {response.text}")
233
+ return {"error": f"Failed to parse LLM JSON response: {json_err}"}
234
+ except ValueError as val_err:
235
+ logger.error(f"Invalid JSON structure from LLM: {val_err}. Response text: {response.text}")
236
+ return {"error": f"Invalid JSON structure from LLM: {val_err}"}
237
+ except Exception as e:
238
+ logger.error(f"LLM call failed during bias detection: {e}", exc_info=True)
239
+ return {"error": f"Error during bias detection: {e}"}
240
+
241
+ # Note: fact_check_with_search primarily prepares the request for research_agent.
242
+ def fact_check_with_search(claim: str) -> Dict[str, str]:
243
+ """Prepares a request to fact-check a specific claim using external search.
244
+ This tool does not perform the search itself but structures the request
245
+ for handoff to the research_agent.
246
+ Args:
247
+ claim (str): The specific factual claim to be checked.
248
+ Returns:
249
+ Dict: A dictionary indicating the need for handoff and the query.
250
+ """
251
+ logger.info(f"Preparing fact-check request for claim: {claim[:150]}...")
252
+ # This tool signals the need for handoff to the research agent.
253
+ # The agent's prompt should guide it to use this tool's output
254
+ # to formulate the handoff message/query.
255
+ return {
256
+ "action": "handoff",
257
+ "target_agent": "research_agent",
258
+ "query": f"Fact-check the following claim: {claim}. Provide supporting or contradicting evidence from reliable sources.",
259
+ "tool_name": "fact_check_with_search" # For context
260
+ }
261
+
262
+ # --- Tool Definitions ---
263
+ cross_reference_tool = FunctionTool.from_defaults(
264
+ fn=cross_reference_check,
265
+ name="cross_reference_check",
266
+ description=(
267
+ "Verifies a claim against a list of provided source contents (text). "
268
+ "Input: claim (str), sources_content (List[Dict[str, str]] with 'content' key). "
269
+ "Output: Dict summarizing findings per source or error."
270
+ ),
271
+ )
272
+
273
+ logical_consistency_tool = FunctionTool.from_defaults(
274
+ fn=logical_consistency_check,
275
+ name="logical_consistency_check",
276
+ description=(
277
+ "Analyzes text for internal logical contradictions or fallacies. "
278
+ "Input: text (str). Output: Dict with 'consistent' (bool) and 'findings' (List[str]) or error."
279
+ ),
280
+ )
281
+
282
+ bias_detection_tool = FunctionTool.from_defaults(
283
+ fn=bias_detection,
284
+ name="bias_detection",
285
+ description=(
286
+ "Examines text for potential biases (cognitive, presentation). "
287
+ "Input: text (str), Optional: source_context (str). "
288
+ "Output: Dict with 'bias_detected' (bool) and 'findings' (List[Dict]) or error."
289
+ ),
290
+ )
291
+
292
+ fact_check_tool = FunctionTool.from_defaults(
293
+ fn=fact_check_with_search,
294
+ name="fact_check_with_search",
295
+ description=(
296
+ "Prepares a request to fact-check a specific claim using external search via the research_agent. "
297
+ "Input: claim (str). Output: Dict indicating handoff parameters for research_agent."
298
+ ),
299
+ )
300
+
301
+ # --- Agent Initialization ---
302
+ def initialize_advanced_validation_agent() -> ReActAgent:
303
+ """Initializes the Advanced Validation Agent."""
304
+ logger.info("Initializing AdvancedValidationAgent...")
305
+
306
+ # Configuration for the agent's main LLM
307
+ agent_llm_model = os.getenv("VALIDATION_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # Use Pro for main agent logic
308
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
309
+
310
+ if not gemini_api_key:
311
+ logger.error("GEMINI_API_KEY not found for AdvancedValidationAgent.")
312
+ raise ValueError("GEMINI_API_KEY must be set for AdvancedValidationAgent")
313
+
314
+ try:
315
+ llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
316
+ logger.info(f"Using agent LLM: {agent_llm_model}")
317
+
318
+ # Load system prompt
319
+ default_system_prompt = ("You are AdvancedValidationAgent... [Default prompt content - replace with actual]" # Placeholder
320
+ )
321
+ system_prompt = load_prompt_from_file("../prompts/advanced_validation_agent_prompt.txt", default_system_prompt)
322
+ if system_prompt == default_system_prompt:
323
+ logger.warning("Using default/fallback system prompt for AdvancedValidationAgent.")
324
+
325
+ # Define available tools
326
+ tools = [
327
+ cross_reference_tool,
328
+ logical_consistency_tool,
329
+ bias_detection_tool,
330
+ fact_check_tool # Tool to initiate handoff for external search
331
+ ]
332
+
333
+ # Define valid handoff targets
334
+ valid_handoffs = [
335
+ "research_agent", # For fact-checking requiring external search
336
+ "planner_agent", # To return results
337
+ "reasoning_agent" # To return results
338
+ ]
339
+
340
+ agent = ReActAgent(
341
+ name="advanced_validation_agent",
342
+ description=(
343
+ "Critically evaluates information for accuracy, consistency, and bias using specialized tools. "
344
+ "Can cross-reference claims, check logic, detect bias, and initiate external fact-checks via research_agent."
345
+ ),
346
+ tools=tools,
347
+ llm=llm,
348
+ system_prompt=system_prompt,
349
+ can_handoff_to=valid_handoffs,
350
+ verbose=True # Enable verbose logging
351
+ )
352
+ logger.info("AdvancedValidationAgent initialized successfully.")
353
+ return agent
354
+
355
+ except Exception as e:
356
+ logger.error(f"Error during AdvancedValidationAgent initialization: {e}", exc_info=True)
357
+ raise
358
+
359
+ # Example usage (for testing if run directly)
360
+ if __name__ == "__main__":
361
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
362
+ logger.info("Running advanced_validation_agent.py directly for testing...")
363
+
364
+ # Check required keys
365
+ required_keys = ["GEMINI_API_KEY"]
366
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
367
+ if missing_keys:
368
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
369
+ else:
370
+ try:
371
+ # Test cross-reference tool
372
+ print("\nTesting cross_reference_check...")
373
+ test_claim = "The Eiffel Tower is located in Berlin."
374
+ test_sources = [
375
+ {"url": "wiki/paris", "content": "Paris is the capital of France, known for the Eiffel Tower."},
376
+ {"url": "wiki/berlin", "content": "Berlin is the capital of Germany, featuring the Brandenburg Gate."}
377
+ ]
378
+ cross_ref_result = cross_reference_check(test_claim, test_sources)
379
+ print(f"Cross-reference Result:\n{json.dumps(cross_ref_result, indent=2)}")
380
+
381
+ # Test logical consistency tool
382
+ print("\nTesting logical_consistency_check...")
383
+ inconsistent_text = "All birds can fly. Penguins are birds. Therefore, penguins can fly."
384
+ consistency_result = logical_consistency_check(inconsistent_text)
385
+ print(f"Consistency Result:\n{json.dumps(consistency_result, indent=2)}")
386
+
387
+ # Test bias detection tool
388
+ print("\nTesting bias_detection...")
389
+ biased_text = "The revolutionary new policy is clearly the only sensible path forward, despite what uninformed critics might claim."
390
+ bias_result = bias_detection(biased_text)
391
+ print(f"Bias Detection Result:\n{json.dumps(bias_result, indent=2)}")
392
+
393
+ # Test fact_check tool (prepares handoff)
394
+ print("\nTesting fact_check_with_search...")
395
+ fact_check_prep = fact_check_with_search("Is the Earth flat?")
396
+ print(f"Fact Check Prep Result:\n{json.dumps(fact_check_prep, indent=2)}")
397
+
398
+ # Initialize the agent (optional)
399
+ # test_agent = initialize_advanced_validation_agent()
400
+ # print("\nAdvanced Validation Agent initialized successfully for testing.")
401
+
402
+ except Exception as e:
403
+ print(f"Error during testing: {e}")
404
+
agents/code_agent.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
+
5
+ from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
6
+ from llama_index.core.tools import FunctionTool
7
+ from llama_index.llms.google_genai import GoogleGenAI
8
+ from llama_index.llms.openai import OpenAI
9
+ from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Setup logging
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Helper function to load prompt from file
18
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
19
+ """Loads a prompt from a text file."""
20
+ try:
21
+ # Assuming the prompt file is in the same directory as the agent script
22
+ script_dir = os.path.dirname(__file__)
23
+ prompt_path = os.path.join(script_dir, filename)
24
+ with open(prompt_path, "r") as f:
25
+ prompt = f.read()
26
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
27
+ return prompt
28
+ except FileNotFoundError:
29
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
30
+ return default_prompt
31
+ except Exception as e:
32
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
33
+ return default_prompt
34
+
35
+ def generate_python_code(prompt: str) -> str:
36
+ """
37
+ Generate valid Python code from a natural language description using a configured LLM.
38
+ Args:
39
+ prompt (str): A clear description of the desired Python code functionality.
40
+ Returns:
41
+ str: A string containing the generated Python code.
42
+ Raises:
43
+ ValueError: If required API key is not set.
44
+ Exception: If the LLM call fails.
45
+ """
46
+ logger.info(f"Generating Python code for prompt: {prompt[:100]}...")
47
+
48
+ # Configuration for code generation LLM
49
+ gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
50
+ gen_api_key_env = os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
51
+ gen_api_key = os.getenv(gen_api_key_env)
52
+
53
+ if not gen_api_key:
54
+ logger.error(f"{gen_api_key_env} not found in environment variables for code generation LLM.")
55
+ raise ValueError(f"{gen_api_key_env} must be set for code generation")
56
+
57
+ # Load the prompt template
58
+ default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
59
+ "You will be given a prompt and you must generate Python code based on that prompt. "
60
+ "You must only generate Python code and nothing else. "
61
+ "Do not include any explanations or any other text. "
62
+ "Do not use any markdown. \n"
63
+ "Prompt: {prompt} \n"
64
+ "Code:\n")
65
+ gen_prompt_template = load_prompt_from_file("../prompts/code_gen_prompt.txt", default_gen_prompt_template)
66
+ input_prompt = gen_prompt_template.format(prompt=prompt)
67
+
68
+ try:
69
+ llm = OpenAI(
70
+ model=gen_llm_model,
71
+ api_key=gen_api_key
72
+ )
73
+ logger.info(f"Using code generation LLM: {gen_llm_model}")
74
+ generated_code = llm.complete(input_prompt)
75
+ logger.info("Code generation successful.")
76
+ return generated_code.text
77
+ except Exception as e:
78
+ logger.error(f"LLM call failed during code generation: {e}", exc_info=True)
79
+ raise # Re-raise the exception to be handled by the agent/workflow
80
+
81
+ # --- Tool Definitions ---
82
+
83
+ python_code_generator_tool = FunctionTool.from_defaults(
84
+ fn=generate_python_code,
85
+ name="python_code_generator",
86
+ description=(
87
+ "Generates executable Python code based on a natural language prompt. "
88
+ "Input: prompt string. Output: Python code string."
89
+ ),
90
+ )
91
+
92
+ # Use LlamaIndex's built-in Code Interpreter Tool Spec for safe execution
93
+ # This assumes the necessary environment (e.g., docker) for the spec is available
94
+ try:
95
+ code_interpreter_spec = CodeInterpreterToolSpec()
96
+ # Get the tool(s) from the spec. It might return multiple tools.
97
+ code_interpreter_tools = code_interpreter_spec.to_tool_list()
98
+ if not code_interpreter_tools:
99
+ raise RuntimeError("CodeInterpreterToolSpec did not return any tools.")
100
+ # Assuming the primary tool is the first one, or find by name if necessary
101
+ code_interpreter_tool = next((t for t in code_interpreter_tools if t.metadata.name == "code_interpreter"), None)
102
+ if code_interpreter_tool is None:
103
+ raise RuntimeError("Could not find 'code_interpreter' tool in CodeInterpreterToolSpec results.")
104
+ logger.info("CodeInterpreterToolSpec initialized successfully.")
105
+ except Exception as e:
106
+ logger.error(f"Failed to initialize CodeInterpreterToolSpec: {e}", exc_info=True)
107
+ # Fallback: Define a dummy tool or raise error to prevent agent start?
108
+ # For now, let initialization fail if the safe interpreter isn't available.
109
+ raise RuntimeError("CodeInterpreterToolSpec failed to initialize. Cannot create code_agent.") from e
110
+
111
+ # --- REMOVED SimpleCodeExecutor ---
112
+ # The SimpleCodeExecutor class that used subprocess has been entirely removed
113
+ # due to severe security risks. Execution MUST go through the CodeInterpreterToolSpec.
114
+
115
+ # --- Agent Initialization ---
116
+
117
+ def initialize_code_agent() -> ReActAgent:
118
+ """Initializes the CodeActAgent, configured for safe code execution."""
119
+ logger.info("Initializing CodeAgent...")
120
+
121
+ # Configuration for the agent's main LLM
122
+ agent_llm_model = os.getenv("CODE_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
123
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
124
+
125
+ if not gemini_api_key:
126
+ logger.error("GEMINI_API_KEY not found in environment variables for CodeAgent.")
127
+ raise ValueError("GEMINI_API_KEY must be set for CodeAgent")
128
+
129
+ try:
130
+ llm = GoogleGenAI(
131
+ api_key=gemini_api_key,
132
+ model=agent_llm_model,
133
+ )
134
+ logger.info(f"Using agent LLM: {agent_llm_model}")
135
+
136
+ # Load system prompt (consider loading from file)
137
+ default_system_prompt = """\
138
+ You are CodeAgent, a specialist in generating and executing Python code. Your mission:
139
+
140
+ 1. **Thought**: Think step-by-step before acting and state your reasoning.
141
+ 2. **Code Generation**: To produce code, call `python_code_generator` with a concise, unambiguous prompt. Review the generated code for correctness and safety.
142
+ 3. **Execution & Testing**: To execute or test code, call `code_interpreter`. Provide the complete code snippet. Analyze its output (stdout, stderr, result) to verify functionality and debug errors.
143
+ 4. **Iteration**: If execution fails or the result is incorrect, analyze the error, think about the fix, generate corrected code using `python_code_generator`, and execute again using `code_interpreter`.
144
+ 5. **Tool Use**: Always adhere strictly to each tool’s input/output format.
145
+ 6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
146
+ 7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
147
+ """
148
+ # system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
149
+ system_prompt = default_system_prompt # Using inline for now
150
+
151
+ agent = ReActAgent(
152
+ name="code_agent",
153
+ description=(
154
+ "Generates Python code using `python_code_generator` and executes it safely using `code_interpreter`. "
155
+ "Iteratively debugs and refines code based on execution results."
156
+ ),
157
+ # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
158
+ tools=[
159
+ python_code_generator_tool,
160
+ code_interpreter_tool, # Use the safe tool from the spec
161
+ ],
162
+ llm=llm,
163
+ system_prompt=system_prompt,
164
+ can_handoff_to=["planner_agent", "reasoning_agent"],
165
+ )
166
+ logger.info("CodeAgent initialized successfully.")
167
+
168
+ return agent
169
+
170
+ except Exception as e:
171
+ logger.error(f"Error during CodeAgent initialization: {e}", exc_info=True)
172
+ raise
173
+
174
+ # Example usage (for testing if run directly)
175
+ if __name__ == "__main__":
176
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
177
+ logger.info("Running code_agent.py directly for testing...")
178
+
179
+ # Ensure API keys are set for testing
180
+ required_keys = ["GEMINI_API_KEY", os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")]
181
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
182
+ if missing_keys:
183
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
184
+ else:
185
+ try:
186
+ test_agent = initialize_code_agent()
187
+ print("Code Agent initialized successfully for testing.")
188
+ # Example test (requires user interaction or pre-defined task)
189
+ # result = test_agent.chat("Write and execute python code to print 'hello world'")
190
+ # print(f"Test query result: {result}")
191
+ except Exception as e:
192
+ print(f"Error during testing: {e}")
193
+
agents/figure_interpretation_agent.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import List, Dict, Optional, Union
4
+ from dotenv import load_dotenv
5
+
6
+ from llama_index.core.agent.workflow import ReActAgent
7
+ from llama_index.core.schema import ImageDocument
8
+ from llama_index.core.tools import FunctionTool
9
+ from llama_index.llms.google_genai import GoogleGenAI
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Setup logging
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Helper function to load prompt from file
18
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
19
+ """Loads a prompt from a text file."""
20
+ try:
21
+ script_dir = os.path.dirname(__file__)
22
+ prompt_path = os.path.join(script_dir, filename)
23
+ with open(prompt_path, "r") as f:
24
+ prompt = f.read()
25
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
26
+ return prompt
27
+ except FileNotFoundError:
28
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
29
+ return default_prompt
30
+ except Exception as e:
31
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
32
+ return default_prompt
33
+
34
+ # --- Core Figure Interpretation Logic (using Multi-Modal LLM) ---
35
+
36
+ def interpret_figure_with_llm(image_path: str, request: str) -> str:
37
+ """Interprets a figure in an image based on a specific request using a multi-modal LLM.
38
+ Args:
39
+ image_path (str): Path to the image file containing the figure.
40
+ request (str): The specific question or interpretation task (e.g., "Describe this chart",
41
+ "Extract sales for Q3", "Identify the main trend").
42
+ Returns:
43
+ str: The interpretation result or an error message.
44
+ """
45
+ logger.info(f"Interpreting figure in image: {image_path} with request: {request}")
46
+
47
+ # Check if image exists
48
+ if not os.path.exists(image_path):
49
+ logger.error(f"Image file not found: {image_path}")
50
+ return f"Error: Image file not found at {image_path}"
51
+
52
+ # LLM configuration (Must be a multi-modal model)
53
+ # Ensure the selected model supports image input (e.g., gemini-1.5-pro)
54
+ llm_model_name = os.getenv("FIGURE_INTERPRETATION_LLM_MODEL", "models/gemini-1.5-pro")
55
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
56
+ if not gemini_api_key:
57
+ logger.error("GEMINI_API_KEY not found for figure interpretation LLM.")
58
+ return "Error: GEMINI_API_KEY not set."
59
+
60
+ try:
61
+ # Initialize the multi-modal LLM
62
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model_name)
63
+ logger.info(f"Using figure interpretation LLM: {llm_model_name}")
64
+
65
+ # Prepare the prompt for the multi-modal LLM
66
+ # The prompt needs to guide the LLM to act as the figure interpreter
67
+ # based on the specific request.
68
+ prompt = (
69
+ f"You are an expert figure interpreter. Analyze the provided image containing a chart, graph, diagram, or table. "
70
+ f"Focus *only* on the visual information present in the image. "
71
+ f"Fulfill the following request accurately and concisely:\n\n"
72
+ f"REQUEST: {request}\n\n"
73
+ f"Based *only* on the image, provide the answer:"
74
+ )
75
+
76
+ # Load the image data (LlamaIndex integration might handle this differently depending on version)
77
+ # Assuming a method to load image data compatible with the LLM call
78
+ # This might involve using ImageBlock or similar structures in newer LlamaIndex versions.
79
+ # For simplicity here, we assume the LLM call can handle a path or loaded image object.
80
+
81
+ # Example using complete (adjust based on actual LlamaIndex multi-modal API)
82
+ # Note: The exact API for multi-modal completion might vary.
83
+ # This is a conceptual example.
84
+ from llama_index.core import SimpleDirectoryReader # Example import
85
+
86
+ # Load the image document
87
+ reader = SimpleDirectoryReader(input_files=[image_path])
88
+ image_documents = reader.load_data()
89
+
90
+ if not image_documents or not isinstance(image_documents[0], ImageDocument):
91
+ logger.error(f"Failed to load image as ImageDocument: {image_path}")
92
+ return f"Error: Could not load image file {image_path} for analysis."
93
+
94
+ # Make the multi-modal completion call
95
+ response = llm.complete(
96
+ prompt=prompt,
97
+ image_documents=image_documents # Pass the loaded image document(s)
98
+ )
99
+
100
+ interpretation = response.text.strip()
101
+ logger.info("Figure interpretation successful.")
102
+ return interpretation
103
+
104
+ except FileNotFoundError:
105
+ # This might be redundant due to the initial check, but good practice
106
+ logger.error(f"Image file not found during LLM call: {image_path}")
107
+ return f"Error: Image file not found at {image_path}"
108
+ except ImportError as ie:
109
+ logger.error(f"Missing library for multi-modal processing: {ie}")
110
+ return f"Error: Missing required library for image processing ({ie})."
111
+ except Exception as e:
112
+ # Catch potential API errors or other issues
113
+ logger.error(f"LLM call failed during figure interpretation: {e}", exc_info=True)
114
+ # Check if the error suggests the model doesn't support images
115
+ if "does not support image input" in str(e).lower():
116
+ logger.error(f"The configured model {llm_model_name} does not support image input.")
117
+ return f"Error: The configured LLM ({llm_model_name}) does not support image input. Please configure a multi-modal model."
118
+ return f"Error during figure interpretation: {e}"
119
+
120
+ # --- Tool Definitions (Wrapping the core logic) ---
121
+ # These tools essentially pass the request to the core LLM function.
122
+
123
+ def describe_figure_tool_fn(image_path: str) -> str:
124
+ "Provides a general description of the figure in the image (type, elements, topic)."
125
+ return interpret_figure_with_llm(image_path, "Describe this figure, including its type, main elements (axes, labels, legend), and overall topic.")
126
+
127
+ def extract_data_points_tool_fn(image_path: str, data_request: str) -> str:
128
+ "Extracts specific data points or values from the figure in the image."
129
+ return interpret_figure_with_llm(image_path, f"Extract the following data points/values from the figure: {data_request}. If exact values are not clear, provide the closest estimate based on the visual.")
130
+
131
+ def identify_trends_tool_fn(image_path: str) -> str:
132
+ "Identifies and describes trends or patterns shown in the figure in the image."
133
+ return interpret_figure_with_llm(image_path, "Analyze and describe the main trends or patterns shown in this figure.")
134
+
135
+ def compare_elements_tool_fn(image_path: str, comparison_request: str) -> str:
136
+ "Compares different elements within the figure in the image."
137
+ return interpret_figure_with_llm(image_path, f"Compare the following elements within the figure: {comparison_request}. Be specific about the comparison based on the visual data.")
138
+
139
+ def summarize_figure_insights_tool_fn(image_path: str) -> str:
140
+ "Summarizes the key insights or main message conveyed by the figure in the image."
141
+ return interpret_figure_with_llm(image_path, "Summarize the key insights or the main message conveyed by this figure.")
142
+
143
+ # --- Tool Definitions for Agent ---
144
+ describe_figure_tool = FunctionTool.from_defaults(
145
+ fn=describe_figure_tool_fn,
146
+ name="describe_figure",
147
+ description="Provides a general description of the figure in the image (type, elements, topic). Input: image_path (str)."
148
+ )
149
+
150
+ extract_data_points_tool = FunctionTool.from_defaults(
151
+ fn=extract_data_points_tool_fn,
152
+ name="extract_data_points",
153
+ description="Extracts specific data points/values from the figure. Input: image_path (str), data_request (str)."
154
+ )
155
+
156
+ identify_trends_tool = FunctionTool.from_defaults(
157
+ fn=identify_trends_tool_fn,
158
+ name="identify_trends",
159
+ description="Identifies and describes trends/patterns in the figure. Input: image_path (str)."
160
+ )
161
+
162
+ compare_elements_tool = FunctionTool.from_defaults(
163
+ fn=compare_elements_tool_fn,
164
+ name="compare_elements",
165
+ description="Compares different elements within the figure. Input: image_path (str), comparison_request (str)."
166
+ )
167
+
168
+ summarize_figure_insights_tool = FunctionTool.from_defaults(
169
+ fn=summarize_figure_insights_tool_fn,
170
+ name="summarize_figure_insights",
171
+ description="Summarizes the key insights/main message of the figure. Input: image_path (str)."
172
+ )
173
+
174
+ # --- Agent Initialization ---
175
+ def initialize_figure_interpretation_agent() -> ReActAgent:
176
+ """Initializes the Figure Interpretation Agent."""
177
+ logger.info("Initializing FigureInterpretationAgent...")
178
+
179
+ # Configuration for the agent's main LLM (can be the same multi-modal one)
180
+ agent_llm_model = os.getenv("FIGURE_INTERPRETATION_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
181
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
182
+
183
+ if not gemini_api_key:
184
+ logger.error("GEMINI_API_KEY not found for FigureInterpretationAgent.")
185
+ raise ValueError("GEMINI_API_KEY must be set for FigureInterpretationAgent")
186
+
187
+ try:
188
+ # Agent's LLM doesn't necessarily need to be multi-modal itself,
189
+ # if the tools handle the multi-modal calls.
190
+ # However, using a multi-modal one might allow more direct interaction patterns later.
191
+ llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
192
+ logger.info(f"Using agent LLM: {agent_llm_model}")
193
+
194
+ # Load system prompt
195
+ default_system_prompt = ("You are FigureInterpretationAgent... [Default prompt content - replace with actual]" # Placeholder
196
+ )
197
+ system_prompt = load_prompt_from_file("../prompts/figure_interpretation_agent_prompt.txt", default_system_prompt)
198
+ if system_prompt == default_system_prompt:
199
+ logger.warning("Using default/fallback system prompt for FigureInterpretationAgent.")
200
+
201
+ # Define available tools
202
+ tools = [
203
+ describe_figure_tool,
204
+ extract_data_points_tool,
205
+ identify_trends_tool,
206
+ compare_elements_tool,
207
+ summarize_figure_insights_tool
208
+ ]
209
+
210
+ # Define valid handoff targets
211
+ valid_handoffs = [
212
+ "planner_agent", # To return results
213
+ "research_agent", # If context from figure needs further research
214
+ "reasoning_agent" # If interpretation needs logical analysis
215
+ ]
216
+
217
+ agent = ReActAgent(
218
+ name="figure_interpretation_agent",
219
+ description=(
220
+ "Analyzes and interprets visual data representations (charts, graphs, tables) from image files. "
221
+ "Can describe figures, extract data, identify trends, compare elements, and summarize insights."
222
+ ),
223
+ tools=tools,
224
+ llm=llm,
225
+ system_prompt=system_prompt,
226
+ can_handoff_to=valid_handoffs,
227
+ # Note: This agent inherently requires multi-modal input capabilities,
228
+ # which are handled within its tools via a multi-modal LLM.
229
+ )
230
+ logger.info("FigureInterpretationAgent initialized successfully.")
231
+ return agent
232
+
233
+ except Exception as e:
234
+ logger.error(f"Error during FigureInterpretationAgent initialization: {e}", exc_info=True)
235
+ raise
236
+
237
+ # Example usage (for testing if run directly)
238
+ if __name__ == "__main__":
239
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
240
+ logger.info("Running figure_interpretation_agent.py directly for testing...")
241
+
242
+ # Check required keys
243
+ required_keys = ["GEMINI_API_KEY"]
244
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
245
+ if missing_keys:
246
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
247
+ else:
248
+ # Check if a multi-modal model is likely configured (heuristic)
249
+ model_name = os.getenv("FIGURE_INTERPRETATION_LLM_MODEL", "models/gemini-1.5-pro")
250
+ if "pro" not in model_name.lower() and "vision" not in model_name.lower():
251
+ print(f"Warning: Configured LLM {model_name} might not support image input. Tests may fail.")
252
+
253
+ # Create a dummy image file for testing (requires Pillow)
254
+ dummy_image_path = "dummy_figure.png"
255
+ try:
256
+ from PIL import Image, ImageDraw, ImageFont
257
+ img = Image.new('RGB', (400, 200), color = (255, 255, 255))
258
+ d = ImageDraw.Draw(img)
259
+ # Try to load a default font, handle if not found
260
+ try:
261
+ font = ImageFont.truetype("arial.ttf", 15) # Common font, might not exist
262
+ except IOError:
263
+ font = ImageFont.load_default()
264
+ print("Arial font not found, using default PIL font.")
265
+ d.text((10,10), "Simple Bar Chart", fill=(0,0,0), font=font)
266
+ d.rectangle([50, 50, 100, 150], fill=(255,0,0)) # Bar 1
267
+ d.text((60, 160), "A", fill=(0,0,0), font=font)
268
+ d.rectangle([150, 80, 200, 150], fill=(0,0,255)) # Bar 2
269
+ d.text((160, 160), "B", fill=(0,0,0), font=font)
270
+ img.save(dummy_image_path)
271
+ print(f"Created dummy image file: {dummy_image_path}")
272
+
273
+ # Test the tools directly
274
+ print("\nTesting describe_figure...")
275
+ desc = describe_figure_tool_fn(dummy_image_path)
276
+ print(f"Description: {desc}")
277
+
278
+ print("\nTesting extract_data_points (qualitative)...")
279
+ extract_req = "Height of bar A vs Bar B" # Qualitative request
280
+ extract_res = extract_data_points_tool_fn(dummy_image_path, extract_req)
281
+ print(f"Extraction Result: {extract_res}")
282
+
283
+ print("\nTesting compare_elements...")
284
+ compare_req = "Compare bar A and bar B"
285
+ compare_res = compare_elements_tool_fn(dummy_image_path, compare_req)
286
+ print(f"Comparison Result: {compare_res}")
287
+
288
+ # Clean up dummy image
289
+ os.remove(dummy_image_path)
290
+
291
+ except ImportError:
292
+ print("Pillow library not installed. Skipping direct tool tests that require image creation.")
293
+ # Optionally, still try initializing the agent
294
+ try:
295
+ test_agent = initialize_figure_interpretation_agent()
296
+ print("\nFigure Interpretation Agent initialized successfully (tool tests skipped).")
297
+ except Exception as e:
298
+ print(f"Error initializing agent: {e}")
299
+ except Exception as e:
300
+ print(f"Error during testing: {e}")
301
+ if os.path.exists(dummy_image_path):
302
+ os.remove(dummy_image_path) # Ensure cleanup on error
303
+
agents/image_analyzer_agent.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
+
5
+ from llama_index.core.agent.workflow import FunctionAgent
6
+ from llama_index.llms.google_genai import GoogleGenAI
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ # Setup logging
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Helper function to load prompt from file
15
+ def load_prompt_from_file(filename="../prompts/image_analyzer_prompt.txt") -> str:
16
+ """Loads the system prompt from a text file."""
17
+ try:
18
+ # Assuming the prompt file is in the same directory as the agent script
19
+ script_dir = os.path.dirname(__file__)
20
+ prompt_path = os.path.join(script_dir, filename)
21
+ with open(prompt_path, "r") as f:
22
+ prompt = f.read()
23
+ logger.info(f"Successfully loaded system prompt from {prompt_path}")
24
+ return prompt
25
+ except FileNotFoundError:
26
+ logger.error(f"Prompt file {filename} not found at {prompt_path}. Using fallback prompt.")
27
+ # Fallback minimal prompt
28
+ return "You are an image analyzer. Describe the image factually."
29
+ except Exception as e:
30
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
31
+ return "You are an image analyzer. Describe the image factually."
32
+
33
+ def initialize_image_analyzer_agent() -> FunctionAgent:
34
+ """
35
+ Create an agent that orchestrates image analysis.
36
+ Uses Gemini Pro multimodal capabilities directly without explicit tools.
37
+ Configuration and prompt are loaded from environment/file.
38
+ """
39
+ logger.info("Initializing ImageAnalyzerAgent...")
40
+
41
+ # Configuration from environment variables
42
+ llm_model_name = os.getenv("IMAGE_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
43
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
44
+
45
+ if not gemini_api_key:
46
+ logger.error("GEMINI_API_KEY not found in environment variables.")
47
+ raise ValueError("GEMINI_API_KEY must be set")
48
+
49
+ try:
50
+ llm = GoogleGenAI(
51
+ api_key=gemini_api_key,
52
+ model=llm_model_name,
53
+ )
54
+ logger.info(f"Using LLM model: {llm_model_name}")
55
+
56
+ # Load system prompt from file
57
+ system_prompt = load_prompt_from_file()
58
+
59
+ # Note: This agent is a FunctionAgent but doesn't explicitly define tools.
60
+ # It relies on the LLM (Gemini 1.5 Pro) to understand the system prompt
61
+ # and perform the analysis when an image is passed in the ChatMessage blocks.
62
+ agent = FunctionAgent(
63
+ name="image_analyzer_agent",
64
+ description=(
65
+ "ImageAnalyzerAgent inspects image files using its multimodal capabilities, "
66
+ "interpreting the visual content according to a detailed factual analysis prompt."
67
+ ),
68
+ llm=llm,
69
+ system_prompt=system_prompt,
70
+ # No explicit tools needed if relying on direct multimodal LLM call
71
+ # tools=[],
72
+ can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"],
73
+ )
74
+ logger.info("ImageAnalyzerAgent initialized successfully.")
75
+ return agent
76
+ except Exception as e:
77
+ logger.error(f"Error during ImageAnalyzerAgent initialization: {e}", exc_info=True)
78
+ raise
79
+
80
+ # Example usage (for testing if run directly)
81
+ if __name__ == "__main__":
82
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
83
+ logger.info("Running image_analyzer_agent.py directly for testing...")
84
+
85
+ # Ensure API key is set for testing
86
+ if not os.getenv("GEMINI_API_KEY"):
87
+ print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
88
+ else:
89
+ try:
90
+ test_agent = initialize_image_analyzer_agent()
91
+ print("Image Analyzer Agent initialized successfully for testing.")
92
+ # To test further, you would need to construct a ChatMessage with an ImageBlock
93
+ # and run agent.chat(message)
94
+ except Exception as e:
95
+ print(f"Error during testing: {e}")
96
+
agents/long_context_management_agent.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ from typing import List, Dict, Optional, Union, Literal
5
+ from dotenv import load_dotenv
6
+
7
+ from llama_index.core.agent.workflow import ReActAgent
8
+ from llama_index.core.tools import FunctionTool, QueryEngineTool
9
+ from llama_index.llms.google_genai import GoogleGenAI
10
+ from llama_index.core import Document, VectorStoreIndex, Settings
11
+ from llama_index.core.node_parser import SentenceSplitter
12
+ from llama_index.core.query_engine import RetrieverQueryEngine
13
+ from llama_index.core.retrievers import VectorIndexRetriever
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+
18
+ # Setup logging
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Configure LlamaIndex Settings (optional, but good practice)
22
+ # Ensure embedding model is set if not using default OpenAI
23
+ # Settings.embed_model = ... # Example: HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
24
+ # Settings.llm = ... # Can set a default LLM here if needed
25
+
26
+ # Helper function to load prompt from file
27
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
28
+ """Loads a prompt from a text file."""
29
+ try:
30
+ script_dir = os.path.dirname(__file__)
31
+ prompt_path = os.path.join(script_dir, filename)
32
+ with open(prompt_path, "r") as f:
33
+ prompt = f.read()
34
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
35
+ return prompt
36
+ except FileNotFoundError:
37
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
38
+ return default_prompt
39
+ except Exception as e:
40
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
41
+ return default_prompt
42
+
43
+ # --- Internal Context Index Management ---
44
+ # Store index and text globally for simplicity in this example
45
+ # In a real application, consider a more robust state management approach
46
+ _context_index: Optional[VectorStoreIndex] = None
47
+ _context_text: Optional[str] = None
48
+ _context_source: Optional[str] = None # e.g., filename or description
49
+
50
+ def _build_or_get_index(text: Optional[str] = None, source: Optional[str] = "loaded_context") -> Optional[VectorStoreIndex]:
51
+ """Builds or retrieves the VectorStoreIndex for the loaded context."""
52
+ global _context_index, _context_text, _context_source
53
+
54
+ if text is not None and (text != _context_text or _context_index is None):
55
+ logger.info(f"Building new context index from text (length: {len(text)} chars). Source: {source}")
56
+ _context_text = text
57
+ _context_source = source
58
+ try:
59
+ # Use SentenceSplitter for chunking
60
+ splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
61
+ Settings.node_parser = splitter # Set globally or pass to index construction
62
+
63
+ documents = [Document(text=_context_text)]
64
+ _context_index = VectorStoreIndex.from_documents(documents, show_progress=True)
65
+ logger.info("Context index built successfully.")
66
+ except Exception as e:
67
+ logger.error(f"Failed to build context index: {e}", exc_info=True)
68
+ _context_index = None
69
+ _context_text = None
70
+ _context_source = None
71
+ return None
72
+
73
+ elif _context_index is None:
74
+ logger.warning("No context loaded or index built yet.")
75
+ return None
76
+
77
+ return _context_index
78
+
79
+ def load_text_context(text: str, source: str = "provided_text") -> str:
80
+ """Loads text into the agent's context and builds an index. Replaces existing context."""
81
+ logger.info(f"Loading new text context (length: {len(text)} chars). Source: {source}")
82
+ index = _build_or_get_index(text=text, source=source)
83
+ if index:
84
+ return f"Successfully loaded and indexed text context from {source} (Length: {len(text)} chars)."
85
+ else:
86
+ return "Error: Failed to load or index the provided text context."
87
+
88
+ # --- Tool Functions ---
89
+
90
+ def summarize_long_context(detail_level: Literal["brief", "standard", "detailed"] = "standard",
91
+ max_length: Optional[int] = None,
92
+ min_length: Optional[int] = None) -> str:
93
+ """Summarizes the currently loaded long text context.
94
+ Args:
95
+ detail_level (str): Level of detail: "brief" (1-2 sentences), "standard" (1-2 paragraphs), "detailed" (multiple paragraphs).
96
+ max_length (Optional[int]): Approximate maximum words (overrides detail_level if set).
97
+ min_length (Optional[int]): Approximate minimum words.
98
+ Returns:
99
+ str: The summary or an error message.
100
+ """
101
+ global _context_text, _context_source
102
+ if _context_text is None:
103
+ return "Error: No long context has been loaded yet. Use 'load_text_context' first."
104
+
105
+ logger.info(f"Summarizing loaded context (Source: {_context_source}, Length: {len(_context_text)} chars). Detail: {detail_level}")
106
+
107
+ # Determine length guidance based on detail_level if max/min not set
108
+ if max_length is None:
109
+ if detail_level == "brief":
110
+ max_length = 50
111
+ min_length = min_length or 10
112
+ elif detail_level == "detailed":
113
+ max_length = 500
114
+ min_length = min_length or 150
115
+ else: # standard
116
+ max_length = 200
117
+ min_length = min_length or 50
118
+ min_length = min_length or int(max_length * 0.3) # Default min length
119
+
120
+ # LLM configuration
121
+ llm_model = os.getenv("CONTEXT_LLM_MODEL", "models/gemini-1.5-pro") # Use Pro for potentially long context
122
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
123
+ if not gemini_api_key:
124
+ logger.error("GEMINI_API_KEY not found for summarization LLM.")
125
+ return "Error: GEMINI_API_KEY not set."
126
+
127
+ # Truncate input text only if extremely long, as Pro handles large contexts
128
+ # Let the LLM handle context window limits if possible
129
+ # max_input_chars = 100000 # Example high limit
130
+ # text_to_summarize = _context_text[:max_input_chars] if len(_context_text) > max_input_chars else _context_text
131
+ text_to_summarize = _context_text # Rely on LLM context window
132
+
133
+ prompt = (
134
+ f"Summarize the following text concisely, focusing on the main points and key information. "
135
+ f"Aim for a length between {min_length} and {max_length} words. "
136
+ f"The requested level of detail is '{detail_level}'.\n\n"
137
+ f"TEXT:\n{text_to_summarize}\n\nSUMMARY:"
138
+ )
139
+
140
+ try:
141
+ llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model)
142
+ logger.info(f"Using summarization LLM: {llm_model}")
143
+ response = llm.complete(prompt)
144
+ summary = response.text.strip()
145
+ logger.info(f"Summarization successful (output length: {len(summary.split())} words).")
146
+ return summary
147
+ except Exception as e:
148
+ logger.error(f"LLM call failed during summarization: {e}", exc_info=True)
149
+ return f"Error during summarization: {e}"
150
+
151
+ def extract_key_information(query: str, max_results: int = 10) -> Union[List[str], str]:
152
+ """Extracts specific information or answers a question based on the loaded long context using the index.
153
+ Args:
154
+ query (str): The question or description of information to extract (e.g., "List all decisions made", "What was mentioned about Project X?").
155
+ max_results (int): Maximum number of distinct pieces of information or text snippets to return.
156
+ Returns:
157
+ List[str]: A list of extracted text snippets or answers, or str: Error message.
158
+ """
159
+ logger.info(f"Extracting information for query: {query} from loaded context. Max results: {max_results}")
160
+ index = _build_or_get_index() # Get existing index
161
+ if index is None:
162
+ return "Error: No context loaded or index available. Use 'load_text_context' first."
163
+
164
+ try:
165
+ # Use a query engine for extraction
166
+ # Configure retriever for potentially broader search
167
+ retriever = VectorIndexRetriever(index=index, similarity_top_k=max_results * 2) # Retrieve more initially
168
+
169
+ # Configure response synthesis (optional, can customize prompt)
170
+ # response_synthesizer = ...
171
+
172
+ query_engine = RetrieverQueryEngine.from_args(retriever=retriever,
173
+ # response_synthesizer=response_synthesizer,
174
+ # llm=Settings.llm # Use default or specify
175
+ )
176
+
177
+ # Formulate a prompt that encourages extraction rather than synthesis if needed
178
+ extraction_prompt = f"Based *only* on the provided context, extract the key information or answer the following query. List distinct findings or provide relevant text snippets. Query: {query}"
179
+
180
+ response = query_engine.query(extraction_prompt)
181
+
182
+ # Process response - might need refinement based on LLM output format
183
+ # Assuming response.response contains the extracted info, potentially needing splitting
184
+ # This part is heuristic and depends on how the LLM responds to the extraction prompt.
185
+ extracted_items = [item.strip() for item in response.response.split("\n") if item.strip()]
186
+
187
+ # Limit results if necessary
188
+ final_results = extracted_items[:max_results]
189
+
190
+ logger.info(f"Extraction successful. Found {len(final_results)} items.")
191
+ return final_results if final_results else ["No specific information found matching the query in the context."]
192
+
193
+ except Exception as e:
194
+ logger.error(f"Error during information extraction: {e}", exc_info=True)
195
+ return f"Error during extraction: {e}"
196
+
197
+ def filter_by_relevance(topic: str, threshold: float = 0.75) -> str:
198
+ """Filters the loaded long context, retaining sections relevant to the topic using the index.
199
+ Args:
200
+ topic (str): The topic or query to filter relevance by.
201
+ threshold (float): Similarity threshold (0.0 to 1.0) for relevance. Higher means more strict.
202
+ Returns:
203
+ str: The filtered text containing only relevant sections, or an error message.
204
+ """
205
+ logger.info(f"Filtering loaded context for relevance to topic: {topic}. Threshold: {threshold}")
206
+ index = _build_or_get_index() # Get existing index
207
+ if index is None:
208
+ return "Error: No context loaded or index available. Use 'load_text_context' first."
209
+
210
+ try:
211
+ retriever = VectorIndexRetriever(index=index, similarity_top_k=20) # Retrieve a decent number of candidates
212
+ retrieved_nodes = retriever.retrieve(topic)
213
+
214
+ relevant_texts = []
215
+ for node_with_score in retrieved_nodes:
216
+ if node_with_score.score >= threshold:
217
+ relevant_texts.append(node_with_score.node.get_content())
218
+ else:
219
+ # Since results are ordered by score, we can stop early
220
+ break
221
+
222
+ if not relevant_texts:
223
+ logger.info("No sections found meeting the relevance threshold.")
224
+ return "No content found matching the specified relevance threshold for the topic."
225
+
226
+ # Combine relevant sections (consider adding separators)
227
+ filtered_text = "\n\n---\n\n".join(relevant_texts)
228
+ logger.info(f"Filtering successful. Combined relevant text length: {len(filtered_text)} chars.")
229
+ return filtered_text
230
+
231
+ except Exception as e:
232
+ logger.error(f"Error during relevance filtering: {e}", exc_info=True)
233
+ return f"Error during filtering: {e}"
234
+
235
+ def query_context_index(query: str) -> str | None:
236
+ """Answers a specific question based on the information contained within the loaded long context using the index.
237
+ Args:
238
+ query (str): The question to answer.
239
+ Returns:
240
+ str: The answer derived from the context, or an error/"not found" message.
241
+ """
242
+ logger.info(f"Querying loaded context index with: {query}")
243
+ index = _build_or_get_index() # Get existing index
244
+ if index is None:
245
+ return "Error: No context loaded or index available. Use 'load_text_context' first."
246
+
247
+ try:
248
+ query_engine = index.as_query_engine(similarity_top_k=5) # Default query engine
249
+ response = query_engine.query(query)
250
+ answer = response.response.strip()
251
+ logger.info("Context query successful.")
252
+ # Check if the LLM indicated it couldn't answer
253
+ if "don't know" in answer.lower() or "no information" in answer.lower() or "context does not mention" in answer.lower():
254
+ logger.warning(f"Query response suggests information not found: {answer}")
255
+ return f"The loaded context does not seem to contain the answer to: {query}"
256
+ return answer
257
+ except Exception as e:
258
+ logger.error(f"Error during context query: {e}", exc_info=True)
259
+ return f"Error querying context: {e}"
260
+
261
+ # --- Tool Definitions ---
262
+ load_context_tool = FunctionTool.from_defaults(
263
+ fn=load_text_context,
264
+ name="load_text_context",
265
+ description=(
266
+ "Loads/replaces the long text context for the agent and builds an internal index. "
267
+ "Input: text (str), Optional: source (str). Output: Status message (str)."
268
+ ),
269
+ )
270
+
271
+ summarize_context_tool = FunctionTool.from_defaults(
272
+ fn=summarize_long_context,
273
+ name="summarize_long_context",
274
+ description=(
275
+ "Summarizes the currently loaded long text context. "
276
+ "Input: Optional: detail_level ('brief', 'standard', 'detailed'), max_length (int), min_length (int). Output: Summary (str) or error."
277
+ ),
278
+ )
279
+
280
+ extract_info_tool = FunctionTool.from_defaults(
281
+ fn=extract_key_information,
282
+ name="extract_key_information",
283
+ description=(
284
+ "Extracts specific information or answers questions from the loaded context using its index. "
285
+ "Input: query (str), Optional: max_results (int). Output: List[str] of findings or error string."
286
+ ),
287
+ )
288
+
289
+ filter_context_tool = FunctionTool.from_defaults(
290
+ fn=filter_by_relevance,
291
+ name="filter_by_relevance",
292
+ description=(
293
+ "Filters the loaded context to retain only sections relevant to a topic, using the index. "
294
+ "Input: topic (str), Optional: threshold (float 0-1). Output: Filtered text (str) or error."
295
+ ),
296
+ )
297
+
298
+ query_context_tool = FunctionTool.from_defaults(
299
+ fn=query_context_index,
300
+ name="query_context_index",
301
+ description=(
302
+ "Answers a specific question based *only* on the loaded long context using its index. "
303
+ "Input: query (str). Output: Answer (str) or error/'not found' message."
304
+ ),
305
+ )
306
+
307
+ # --- Agent Initialization ---
308
+ def initialize_long_context_management_agent() -> ReActAgent:
309
+ """Initializes the Long Context Management Agent."""
310
+ logger.info("Initializing LongContextManagementAgent...")
311
+
312
+ # Configuration for the agent's main LLM
313
+ agent_llm_model = os.getenv("CONTEXT_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # Needs to handle planning
314
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
315
+
316
+ if not gemini_api_key:
317
+ logger.error("GEMINI_API_KEY not found for LongContextManagementAgent.")
318
+ raise ValueError("GEMINI_API_KEY must be set for LongContextManagementAgent")
319
+
320
+ try:
321
+ llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
322
+ logger.info(f"Using agent LLM: {agent_llm_model}")
323
+ Settings.llm = llm # Set default LLM for LlamaIndex components used by tools
324
+
325
+ # Load system prompt
326
+ default_system_prompt = ("You are LongContextManagementAgent... [Default prompt content - replace with actual]" # Placeholder
327
+ )
328
+ system_prompt = load_prompt_from_file("../prompts/long_context_management_agent_prompt.txt", default_system_prompt)
329
+ if system_prompt == default_system_prompt:
330
+ logger.warning("Using default/fallback system prompt for LongContextManagementAgent.")
331
+
332
+ # Define available tools
333
+ tools = [
334
+ load_context_tool,
335
+ summarize_context_tool,
336
+ extract_info_tool,
337
+ filter_context_tool,
338
+ query_context_tool
339
+ ]
340
+
341
+ # Define valid handoff targets
342
+ valid_handoffs = [
343
+ "planner_agent", # To return results
344
+ "text_analyzer_agent", # If further analysis of extracted/filtered text is needed
345
+ "reasoning_agent"
346
+ ]
347
+
348
+ agent = ReActAgent(
349
+ name="long_context_management_agent",
350
+ description=(
351
+ "Manages and processes long textual context. Can load text (`load_text_context`), summarize (`summarize_long_context`), "
352
+ "extract key info (`extract_key_information`), filter by relevance (`filter_by_relevance`), and answer questions based on the context (`query_context_index`)."
353
+ ),
354
+ tools=tools,
355
+ llm=llm,
356
+ system_prompt=system_prompt,
357
+ can_handoff_to=valid_handoffs,
358
+ )
359
+ logger.info("LongContextManagementAgent initialized successfully.")
360
+ return agent
361
+
362
+ except Exception as e:
363
+ logger.error(f"Error during LongContextManagementAgent initialization: {e}", exc_info=True)
364
+ raise
365
+
366
+ # Example usage (for testing if run directly)
367
+ if __name__ == "__main__":
368
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
369
+ # Set LlamaIndex log level higher to reduce noise during testing
370
+ logging.getLogger("llama_index.core.indices.vector_store").setLevel(logging.WARNING)
371
+ logging.getLogger("llama_index.core.query_engine").setLevel(logging.WARNING)
372
+ logging.getLogger("llama_index.core.token_counter").setLevel(logging.ERROR) # Suppress token counting logs
373
+
374
+ logger.info("Running long_context_management_agent.py directly for testing...")
375
+
376
+ # Check required keys
377
+ required_keys = ["GEMINI_API_KEY"]
378
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
379
+ if missing_keys:
380
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
381
+ else:
382
+ try:
383
+ # Example long text
384
+ long_text = """
385
+ Meeting Minutes - Project Phoenix - April 28, 2025
386
+ Attendees: Alice, Bob, Charlie, David
387
+ Agenda: Review Q1 results, Plan Q2 roadmap, Budget allocation
388
+
389
+ Q1 Results Discussion:
390
+ Alice presented the sales figures. Sales increased by 15% compared to Q4 2024, exceeding the target of 10%.
391
+ Bob highlighted the success of the marketing campaign launched in February. Customer acquisition cost decreased by 5%.
392
+ Charlie noted a slight dip in user engagement metrics in March, possibly due to a recent UI change.
393
+ Action Item: David to investigate the engagement dip.
394
+
395
+ Q2 Roadmap Planning:
396
+ The team discussed potential features for Q2. Feature A (enhanced reporting) was prioritized.
397
+ Feature B (mobile app improvements) was deferred to Q3.
398
+ Alice emphasized the need for stability improvements. Bob suggested focusing on performance optimization.
399
+ Decision: Q2 focus will be on Feature A and performance/stability improvements.
400
+
401
+ Budget Allocation:
402
+ Charlie presented the proposed budget.
403
+ An additional $50,000 was requested for cloud infrastructure scaling due to increased usage.
404
+ David questioned the necessity of the full amount.
405
+ After discussion, the team approved an additional $40,000 for infrastructure.
406
+ Decision: Allocate $40,000 extra for Q2 infrastructure.
407
+
408
+ Next Steps:
409
+ David to report on engagement metrics by May 5th.
410
+ Alice to finalize Q2 feature specifications by May 10th.
411
+ Meeting adjourned.
412
+ """ * 5 # Make it longer
413
+
414
+ # Test loading context
415
+ print("\nTesting load_text_context...")
416
+ load_status = load_text_context(long_text, source="Meeting Minutes Test")
417
+ print(load_status)
418
+
419
+ if "Error" not in load_status:
420
+ # Test summarization
421
+ print("\nTesting summarize_long_context (brief)...")
422
+ summary_brief = summarize_long_context(detail_level="brief")
423
+ print(f"Brief Summary: {summary_brief}")
424
+
425
+ # Test extraction
426
+ print("\nTesting extract_key_information (decisions)...")
427
+ decisions = extract_key_information(query="List all decisions made in the meeting")
428
+ print(f"Decisions Extracted: {decisions}")
429
+
430
+ # Test filtering
431
+ print("\nTesting filter_by_relevance (budget)...")
432
+ budget_text = filter_by_relevance(topic="budget allocation", threshold=0.7)
433
+ print(f"Filtered Budget Text (first 300 chars):\n{budget_text[:300]}...")
434
+
435
+ # Test querying
436
+ print("\nTesting query_context_index (Q1 sales)...")
437
+ sales_query = "What was the sales increase in Q1?"
438
+ sales_answer = query_context_index(sales_query)
439
+ print(f"Answer to '{sales_query}': {sales_answer}")
440
+
441
+ print("\nTesting query_context_index (non-existent info)...")
442
+ non_existent_query = "Who is the CEO?"
443
+ non_existent_answer = query_context_index(non_existent_query)
444
+ print(f"Answer to '{non_existent_query}': {non_existent_answer}")
445
+
446
+ # Initialize the agent (optional)
447
+ # test_agent = initialize_long_context_management_agent()
448
+ # print("\nLong Context Management Agent initialized successfully for testing.")
449
+
450
+ except Exception as e:
451
+ print(f"Error during testing: {e}")
452
+
agents/math_agent.py ADDED
@@ -0,0 +1,696 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import List, Optional, Union, Dict
4
+ from dotenv import load_dotenv
5
+
6
+ import sympy as sp
7
+ import numpy as np
8
+ import scipy.linalg as la
9
+ import scipy.special as special
10
+ from scipy.integrate import odeint, quad
11
+ from scipy.stats import binom, norm, poisson
12
+ import numpy.fft as fft
13
+
14
+ from llama_index.core.agent.workflow import ReActAgent
15
+ from llama_index.core.tools import FunctionTool
16
+ from llama_index.llms.google_genai import GoogleGenAI
17
+ from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
18
+
19
+ # Load environment variables
20
+ load_dotenv()
21
+
22
+ # Setup logging
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # --- Math Tool Functions (with enhanced logging and error handling) ---
26
+
27
+ # Helper decorator for error handling and logging
28
+ def math_tool_handler(func):
29
+ def wrapper(*args, **kwargs):
30
+ func_name = func.__name__
31
+ logger.info(f"Executing math tool: {func_name} with args: {args}, kwargs: {kwargs}")
32
+ try:
33
+ result = func(*args, **kwargs)
34
+ logger.info(f"Tool {func_name} executed successfully. Result: {str(result)[:200]}...")
35
+ # Ensure result is serializable (convert numpy types if necessary)
36
+ if isinstance(result, np.ndarray):
37
+ return result.tolist()
38
+ if isinstance(result, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)):
39
+ return int(result)
40
+ if isinstance(result, (np.float_, np.float16, np.float32, np.float64)):
41
+ return float(result)
42
+ if isinstance(result, (np.complex_, np.complex64, np.complex128)):
43
+ return complex(result)
44
+ if isinstance(result, np.bool_):
45
+ return bool(result)
46
+ if isinstance(result, dict):
47
+ return {k: wrapper(v) if isinstance(v, (np.ndarray, np.number, np.bool_)) else v for k, v in result.items()} # Recursively handle dicts
48
+ return result
49
+ except (sp.SympifyError, TypeError, ValueError, np.linalg.LinAlgError, ZeroDivisionError) as e:
50
+ logger.warning(f"Math error in {func_name}: {e}")
51
+ return f"Error in {func_name}: {e}"
52
+ except Exception as e:
53
+ logger.error(f"Unexpected error in {func_name}: {e}", exc_info=True)
54
+ return f"Unexpected error in {func_name}: {e}"
55
+ return wrapper
56
+
57
+ # --- Symbolic math functions ---
58
+ @math_tool_handler
59
+ def solve_symbolic_equation(equation: str, variable: str = "x") -> str:
60
+ """Solve a symbolic equation (e.g., 'x**2 - 4') for the given variable."""
61
+ symbol = sp.symbols(variable)
62
+ # Ensure equation is treated as expression == 0 if no equality sign
63
+ if "=" not in equation:
64
+ expr = sp.sympify(equation)
65
+ else:
66
+ lhs, rhs = equation.split("=", 1)
67
+ expr = sp.Eq(sp.sympify(lhs.strip()), sp.sympify(rhs.strip()))
68
+ solutions = sp.solve(expr, symbol)
69
+ return f"Solutions: {solutions}"
70
+
71
+ @math_tool_handler
72
+ def compute_derivative(expression: str, variable: str = "x") -> str:
73
+ """Compute the symbolic derivative of an expression (e.g., 'sin(x)*x**2')."""
74
+ symbol = sp.symbols(variable)
75
+ expr = sp.sympify(expression)
76
+ deriv = sp.diff(expr, symbol)
77
+ return f"Derivative: {deriv}"
78
+
79
+ @math_tool_handler
80
+ def compute_integral(expression: str, variable: str = "x") -> str:
81
+ """Compute the symbolic indefinite integral of an expression (e.g., '1/x')."""
82
+ symbol = sp.symbols(variable)
83
+ expr = sp.sympify(expression)
84
+ integ = sp.integrate(expr, symbol)
85
+ return f"Integral: {integ} + C"
86
+
87
+ @math_tool_handler
88
+ def compute_limit(
89
+ expression: str, variable: str = "x", point: str = "oo"
90
+ ) -> str:
91
+ """Compute the limit of an expression (e.g., 'sin(x)/x') as variable approaches point (e.g., '0', 'oo')."""
92
+ symbol = sp.symbols(variable)
93
+ expr = sp.sympify(expression)
94
+ # Handle 'oo', '-oo', 'zoo' for infinity, or numerical points
95
+ if point.lower() == "oo":
96
+ pt = sp.oo
97
+ elif point.lower() == "-oo":
98
+ pt = -sp.oo
99
+ elif point.lower() == "zoo":
100
+ pt = sp.zoo # Complex infinity
101
+ else:
102
+ pt = sp.sympify(point)
103
+ lim = sp.limit(expr, symbol, pt)
104
+ return f"Limit at {point}: {lim}"
105
+
106
+ @math_tool_handler
107
+ def simplify_expression(expression: str) -> str:
108
+ """Simplify a symbolic expression (e.g., 'sin(x)**2 + cos(x)**2')."""
109
+ expr = sp.sympify(expression)
110
+ simp = sp.simplify(expr)
111
+ return f"Simplified expression: {simp}"
112
+
113
+ @math_tool_handler
114
+ def expand_expression(expression: str) -> str:
115
+ """Expand a symbolic expression (e.g., '(x+y)**2')."""
116
+ expr = sp.sympify(expression)
117
+ exp = sp.expand(expr)
118
+ return f"Expanded expression: {exp}"
119
+
120
+ @math_tool_handler
121
+ def factor_expression(expression: str) -> str:
122
+ """Factor a symbolic expression (e.g., 'x**2 - y**2')."""
123
+ expr = sp.sympify(expression)
124
+ fact = sp.factor(expr)
125
+ return f"Factored expression: {fact}"
126
+
127
+
128
+ # --- Matrix math functions ---
129
+ @math_tool_handler
130
+ def matrix_addition(a: List[List[float]], b: List[List[float]]) -> List[List[float]]:
131
+ """Add two matrices element-wise. Input: [[1, 2], [3, 4]], [[5, 6], [7, 8]]."""
132
+ A = np.array(a)
133
+ B = np.array(b)
134
+ if A.shape != B.shape:
135
+ raise ValueError("Matrices must have the same shape for addition.")
136
+ return (A + B)
137
+
138
+ @math_tool_handler
139
+ def matrix_subtraction(a: List[List[float]], b: List[List[float]]) -> List[List[float]]:
140
+ """Subtract matrix B from matrix A element-wise. Input: [[5, 6], [7, 8]], [[1, 2], [3, 4]]."""
141
+ A = np.array(a)
142
+ B = np.array(b)
143
+ if A.shape != B.shape:
144
+ raise ValueError("Matrices must have the same shape for subtraction.")
145
+ return (A - B)
146
+
147
+ @math_tool_handler
148
+ def matrix_multiplication(a: List[List[float]], b: List[List[float]]) -> List[List[float]]:
149
+ """Multiply two matrices. Input: [[1, 2], [3, 4]], [[5, 6], [7, 8]]."""
150
+ A = np.array(a)
151
+ B = np.array(b)
152
+ if A.shape[1] != B.shape[0]:
153
+ raise ValueError("Inner dimensions must match for matrix multiplication.")
154
+ return np.matmul(A, B)
155
+
156
+ @math_tool_handler
157
+ def matrix_inverse(matrix: List[List[float]]) -> List[List[float]]:
158
+ """Compute the inverse of a square matrix. Input: [[1, 2], [3, 4]]."""
159
+ M = np.array(matrix)
160
+ if M.shape[0] != M.shape[1]:
161
+ raise ValueError("Matrix must be square to compute inverse.")
162
+ return np.linalg.inv(M)
163
+
164
+ @math_tool_handler
165
+ def matrix_determinant(matrix: List[List[float]]) -> float:
166
+ """Compute the determinant of a square matrix. Input: [[1, 2], [3, 4]]."""
167
+ M = np.array(matrix)
168
+ if M.shape[0] != M.shape[1]:
169
+ raise ValueError("Matrix must be square to compute determinant.")
170
+ return np.linalg.det(M)
171
+
172
+ @math_tool_handler
173
+ def matrix_transpose(matrix: List[List[float]]) -> List[List[float]]:
174
+ """Transpose a matrix. Input: [[1, 2, 3], [4, 5, 6]]."""
175
+ M = np.array(matrix)
176
+ return M.T
177
+
178
+ @math_tool_handler
179
+ def matrix_rank(matrix: List[List[float]]) -> int:
180
+ """Compute the rank of a matrix. Input: [[1, 2], [2, 4]]."""
181
+ M = np.array(matrix)
182
+ return np.linalg.matrix_rank(M)
183
+
184
+ @math_tool_handler
185
+ def matrix_trace(matrix: List[List[float]]) -> float:
186
+ """Compute the trace of a square matrix. Input: [[1, 2], [3, 4]]."""
187
+ M = np.array(matrix)
188
+ if M.shape[0] != M.shape[1]:
189
+ raise ValueError("Matrix must be square to compute trace.")
190
+ return np.trace(M)
191
+
192
+ @math_tool_handler
193
+ def matrix_norm(matrix: List[List[float]], ord_str: str = "fro") -> float:
194
+ """Compute the norm of a matrix. ord_str can be 'fro' (Frobenius), 'nuc' (nuclear), inf, -inf, 1, -1, 2, -2. Input: [[1, 2], [3, 4]]."""
195
+ M = np.array(matrix)
196
+ ord_map = {"fro": "fro", "nuc": "nuc", "inf": np.inf, "-inf": -np.inf, "1": 1, "-1": -1, "2": 2, "-2": -2}
197
+ ord_val = ord_map.get(ord_str)
198
+ if ord_val is None:
199
+ raise ValueError(f"Invalid ord_str: {ord_str}. Must be one of {list(ord_map.keys())}")
200
+ return np.linalg.norm(M, ord=ord_val)
201
+
202
+ @math_tool_handler
203
+ def eigenvalues(matrix: List[List[float]]) -> List[complex]:
204
+ """Compute eigenvalues of a square matrix. Input: [[1, -1], [1, 1]]."""
205
+ M = np.array(matrix)
206
+ if M.shape[0] != M.shape[1]:
207
+ raise ValueError("Matrix must be square to compute eigenvalues.")
208
+ vals = np.linalg.eigvals(M)
209
+ return vals
210
+
211
+ @math_tool_handler
212
+ def eigenvectors(matrix: List[List[float]]) -> List[List[complex]]:
213
+ """Compute eigenvectors of a square matrix. Returns list of eigenvectors. Input: [[1, -1], [1, 1]]."""
214
+ M = np.array(matrix)
215
+ if M.shape[0] != M.shape[1]:
216
+ raise ValueError("Matrix must be square to compute eigenvectors.")
217
+ vals, vecs = np.linalg.eig(M)
218
+ # Return eigenvectors as rows or columns? Let's return as list of column vectors
219
+ return vecs.T # Transpose to get eigenvectors as list items
220
+
221
+ @math_tool_handler
222
+ def svd_decompose(matrix: List[List[float]]) -> Dict[str, List]:
223
+ """Compute the singular value decomposition (U, S, Vh) of a matrix. Input: [[1, 2], [3, 4], [5, 6]]."""
224
+ M = np.array(matrix)
225
+ U, S, Vh = np.linalg.svd(M)
226
+ return {"U": U, "S": S, "Vh": Vh}
227
+
228
+ @math_tool_handler
229
+ def lu_decompose(matrix: List[List[float]]) -> Dict[str, List]:
230
+ """Compute the LU decomposition (P, L, U) of a matrix. Input: [[1, 2], [3, 4]]."""
231
+ M = np.array(matrix)
232
+ P, L, U = la.lu(M)
233
+ return {"P": P, "L": L, "U": U}
234
+
235
+ @math_tool_handler
236
+ def qr_decompose(matrix: List[List[float]]) -> Dict[str, List]:
237
+ """Compute the QR decomposition (Q, R) of a matrix. Input: [[1, 2], [3, 4]]."""
238
+ M = np.array(matrix)
239
+ Q, R = np.linalg.qr(M)
240
+ return {"Q": Q, "R": R}
241
+
242
+ # --- Statistics functions ---
243
+ @math_tool_handler
244
+ def mean(values: List[float]) -> float:
245
+ """Compute the mean of a list of numbers. Input: [1, 2, 3, 4, 5]."""
246
+ if not values:
247
+ raise ValueError("Input list cannot be empty for mean calculation.")
248
+ return np.mean(np.array(values))
249
+
250
+ @math_tool_handler
251
+ def median(values: List[float]) -> float:
252
+ """Compute the median of a list of numbers. Input: [1, 3, 2, 4, 5]."""
253
+ if not values:
254
+ raise ValueError("Input list cannot be empty for median calculation.")
255
+ return np.median(np.array(values))
256
+
257
+ @math_tool_handler
258
+ def std_dev(values: List[float], ddof: int = 1) -> float:
259
+ """Compute the sample standard deviation (ddof=1) or population (ddof=0) of a list. Input: [1, 2, 3, 4, 5]."""
260
+ if not values or len(values) < ddof:
261
+ raise ValueError(f"Input list must have at least {ddof} elements for std dev with ddof={ddof}.")
262
+ return np.std(np.array(values), ddof=ddof)
263
+
264
+ @math_tool_handler
265
+ def variance(values: List[float], ddof: int = 1) -> float:
266
+ """Compute the sample variance (ddof=1) or population (ddof=0) of a list. Input: [1, 2, 3, 4, 5]."""
267
+ if not values or len(values) < ddof:
268
+ raise ValueError(f"Input list must have at least {ddof} elements for variance with ddof={ddof}.")
269
+ return np.var(np.array(values), ddof=ddof)
270
+
271
+ @math_tool_handler
272
+ def percentile(values: List[float], percent: float) -> float:
273
+ """Compute the q-th percentile (0<=q<=100) of a list. Input: [1, 2, 3, 4, 5], 75."""
274
+ if not values:
275
+ raise ValueError("Input list cannot be empty for percentile calculation.")
276
+ if not (0 <= percent <= 100):
277
+ raise ValueError("Percent must be between 0 and 100.")
278
+ return np.percentile(np.array(values), percent)
279
+
280
+ @math_tool_handler
281
+ def covariance(x: List[float], y: List[float], ddof: int = 1) -> float:
282
+ """Compute sample covariance (ddof=1) or population (ddof=0) between two lists. Input: [1, 2, 3], [4, 5, 6]."""
283
+ X = np.array(x)
284
+ Y = np.array(y)
285
+ if X.size != Y.size:
286
+ raise ValueError("Input lists must have the same length for covariance.")
287
+ if X.size == 0 or X.size < ddof:
288
+ raise ValueError(f"Input lists must have at least {ddof} elements for covariance with ddof={ddof}.")
289
+ # np.cov returns the covariance matrix, we want the off-diagonal element
290
+ return np.cov(X, Y, ddof=ddof)[0, 1]
291
+
292
+ @math_tool_handler
293
+ def correlation(x: List[float], y: List[float]) -> float:
294
+ """Compute Pearson correlation coefficient between two lists. Input: [1, 2, 3], [1, 2, 3.1]."""
295
+ X = np.array(x)
296
+ Y = np.array(y)
297
+ if X.size != Y.size:
298
+ raise ValueError("Input lists must have the same length for correlation.")
299
+ if X.size < 2:
300
+ raise ValueError("Need at least 2 data points for correlation.")
301
+ # np.corrcoef returns the correlation matrix
302
+ corr_matrix = np.corrcoef(X, Y)
303
+ # Handle case where std dev is zero (results in nan)
304
+ if np.isnan(corr_matrix[0, 1]):
305
+ logger.warning("Correlation resulted in NaN, likely due to zero standard deviation in one or both inputs.")
306
+ # Return 0 or raise error? Let's return 0 for now.
307
+ return 0.0
308
+ return corr_matrix[0, 1]
309
+
310
+ @math_tool_handler
311
+ def linear_regression(x: List[float], y: List[float]) -> Dict[str, float]:
312
+ """Perform simple linear regression (y = mx + c). Returns slope (m) and intercept (c). Input: [1, 2, 3], [2, 4.1, 5.9]."""
313
+ X = np.array(x)
314
+ Y = np.array(y)
315
+ if X.size != Y.size:
316
+ raise ValueError("Input lists must have the same length for linear regression.")
317
+ if X.size < 2:
318
+ raise ValueError("Need at least 2 data points for linear regression.")
319
+ slope, intercept = np.polyfit(X, Y, 1)
320
+ return {"slope": slope, "intercept": intercept}
321
+
322
+ # --- Numerical functions ---
323
+ @math_tool_handler
324
+ def find_polynomial_roots(coefficients: List[float]) -> List[complex]:
325
+ """Find roots of a polynomial given coefficients [a_n, a_n-1, ..., a_0]. Input: [1, -3, 2] for x^2-3x+2."""
326
+ if not coefficients:
327
+ raise ValueError("Coefficient list cannot be empty.")
328
+ return np.roots(coefficients)
329
+
330
+ @math_tool_handler
331
+ def interpolate_value(x_vals: List[float], y_vals: List[float], x: float) -> float:
332
+ """Linear interpolate a value at x given data points (x_vals, y_vals). Input: [0, 1, 2], [0, 1, 4], 1.5."""
333
+ if len(x_vals) != len(y_vals):
334
+ raise ValueError("x_vals and y_vals must have the same length.")
335
+ if len(x_vals) < 2:
336
+ raise ValueError("Need at least 2 data points for interpolation.")
337
+ # Ensure x_vals are sorted for np.interp
338
+ sorted_indices = np.argsort(x_vals)
339
+ x_sorted = np.array(x_vals)[sorted_indices]
340
+ y_sorted = np.array(y_vals)[sorted_indices]
341
+ return np.interp(x, x_sorted, y_sorted)
342
+
343
+ @math_tool_handler
344
+ def numerical_integration(
345
+ func_str: str, a: float, b: float, variable: str = "x"
346
+ ) -> float:
347
+ """Numerically integrate func_str (e.g., 'x**2 * sin(x)') from a to b. Input: 'x**2', 0, 1."""
348
+ symbol = sp.symbols(variable)
349
+ # Security Note: Using sympify/lambdify can be risky if func_str is untrusted.
350
+ # Consider using a safer evaluation method if input is external.
351
+ try:
352
+ func = sp.sympify(func_str)
353
+ f_lambdified = sp.lambdify(symbol, func, modules=["numpy"])
354
+ except (sp.SympifyError, SyntaxError) as sym_err:
355
+ raise ValueError(f"Invalid function string: {func_str}. Error: {sym_err}")
356
+
357
+ result, abserr = quad(f_lambdified, a, b)
358
+ logger.info(f"Numerical integration estimated absolute error: {abserr}")
359
+ return result
360
+
361
+ @math_tool_handler
362
+ def solve_ode(
363
+ func_str: str, y0: float, t_eval: List[float], args: tuple = ()
364
+ ) -> List[float]:
365
+ """Solve a first-order ODE dy/dt = f(t, y) using scipy.integrate.solve_ivp.
366
+ func_str should define f(t, y), e.g., '-y + sin(t)'.
367
+ y0 is the initial condition y(t_eval[0]).
368
+ t_eval is the list of time points to evaluate the solution at.
369
+ args are optional additional arguments passed to f(t, y, *args).
370
+ Input: func_str='-y', y0=1, t_eval=[0, 1, 2, 3, 4]."""
371
+ from scipy.integrate import solve_ivp
372
+ import math # Make math functions available
373
+
374
+ # Security Note: Using eval is dangerous with untrusted input.
375
+ # A safer approach would parse the expression or use a restricted environment.
376
+ def ode_func(t, y, *args):
377
+ try:
378
+ # Provide t, y, args, and safe math functions in the eval context
379
+ local_vars = {"t": t, "y": y, "math": math, "np": np}
380
+ # Add args if provided
381
+ if args:
382
+ # Assuming args correspond to p1, p2, ... in the func_str
383
+ for i, arg_val in enumerate(args):
384
+ local_vars[f"p{i+1}"] = arg_val
385
+ return eval(func_str, {"__builtins__": {}}, local_vars)
386
+ except Exception as e:
387
+ # Log the error and raise it to be caught by the handler
388
+ logger.error(f"Error evaluating ODE function {func_str} at t={t}, y={y}: {e}")
389
+ raise ValueError(f"Error in ODE function definition: {e}")
390
+
391
+ if not t_eval:
392
+ raise ValueError("t_eval list cannot be empty.")
393
+ t_span = (min(t_eval), max(t_eval))
394
+
395
+ sol = solve_ivp(ode_func, t_span, [y0], t_eval=t_eval, args=args)
396
+
397
+ if not sol.success:
398
+ raise RuntimeError(f"ODE solver failed: {sol.message}")
399
+
400
+ return sol.y[0] # Return the solution for y
401
+
402
+ # --- Vector functions ---
403
+ @math_tool_handler
404
+ def dot_product(a: List[float], b: List[float]) -> float:
405
+ """Compute dot product of two vectors. Input: [1, 2, 3], [4, 5, 6]."""
406
+ A = np.array(a)
407
+ B = np.array(b)
408
+ if A.shape != B.shape:
409
+ raise ValueError("Vectors must have the same dimension for dot product.")
410
+ return np.dot(A, B)
411
+
412
+ @math_tool_handler
413
+ def cross_product(a: List[float], b: List[float]) -> List[float]:
414
+ """Compute cross product of two 3D vectors. Input: [1, 0, 0], [0, 1, 0]."""
415
+ A = np.array(a)
416
+ B = np.array(b)
417
+ if A.size != 3 or B.size != 3:
418
+ raise ValueError("Cross product is only defined for 3D vectors.")
419
+ return np.cross(A, B)
420
+
421
+ @math_tool_handler
422
+ def vector_magnitude(a: List[float]) -> float:
423
+ """Compute magnitude (Euclidean norm) of a vector. Input: [3, 4]."""
424
+ if not a:
425
+ raise ValueError("Input vector cannot be empty.")
426
+ return np.linalg.norm(np.array(a))
427
+
428
+ @math_tool_handler
429
+ def vector_normalize(a: List[float]) -> List[float]:
430
+ """Normalize a vector to unit length. Input: [3, 4]."""
431
+ A = np.array(a)
432
+ norm = np.linalg.norm(A)
433
+ if norm == 0:
434
+ raise ValueError("Cannot normalize a zero vector.")
435
+ return (A / norm)
436
+
437
+ @math_tool_handler
438
+ def vector_angle(a: List[float], b: List[float], degrees: bool = False) -> float:
439
+ """Compute the angle (in radians or degrees) between two vectors. Input: [1, 0], [0, 1]."""
440
+ dot = dot_product(a, b) # Use our handled dot_product
441
+ norm_a = vector_magnitude(a)
442
+ norm_b = vector_magnitude(b)
443
+ if norm_a == 0 or norm_b == 0:
444
+ raise ValueError("Cannot compute angle with zero vector(s).")
445
+ # Clip argument to arccos to avoid domain errors due to floating point inaccuracies
446
+ cos_theta = np.clip(dot / (norm_a * norm_b), -1.0, 1.0)
447
+ angle_rad = np.arccos(cos_theta)
448
+ return np.degrees(angle_rad) if degrees else angle_rad
449
+
450
+ # --- Probability functions ---
451
+ @math_tool_handler
452
+ def binomial_pmf(k: int, n: int, p: float) -> float:
453
+ """Compute binomial probability mass function P(X=k | n, p). Input: k=2, n=5, p=0.5."""
454
+ if not (0 <= p <= 1):
455
+ raise ValueError("Probability p must be between 0 and 1.")
456
+ if not (0 <= k <= n):
457
+ raise ValueError("k must be between 0 and n (inclusive).")
458
+ return binom.pmf(k, n, p)
459
+
460
+ @math_tool_handler
461
+ def normal_pdf(x: float, mu: float = 0, sigma: float = 1) -> float:
462
+ """Compute normal distribution probability density function N(x | mu, sigma). Input: x=0, mu=0, sigma=1."""
463
+ if sigma <= 0:
464
+ raise ValueError("Standard deviation sigma must be positive.")
465
+ return norm.pdf(x, mu, sigma)
466
+
467
+ @math_tool_handler
468
+ def normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
469
+ """Compute normal distribution cumulative distribution function P(X<=x | mu, sigma). Input: x=0, mu=0, sigma=1."""
470
+ if sigma <= 0:
471
+ raise ValueError("Standard deviation sigma must be positive.")
472
+ return norm.cdf(x, mu, sigma)
473
+
474
+ @math_tool_handler
475
+ def poisson_pmf(k: int, lam: float) -> float:
476
+ """Compute Poisson probability mass function P(X=k | lambda). Input: k=2, lam=3."""
477
+ if lam < 0:
478
+ raise ValueError("Rate parameter lambda must be non-negative.")
479
+ if k < 0 or not isinstance(k, int):
480
+ raise ValueError("k must be a non-negative integer.")
481
+ return poisson.pmf(k, lam)
482
+
483
+ # --- Special functions ---
484
+ @math_tool_handler
485
+ def gamma_function(x: float) -> float:
486
+ """Compute the gamma function Gamma(x). Input: 5."""
487
+ return special.gamma(x)
488
+
489
+ @math_tool_handler
490
+ def beta_function(x: float, y: float) -> float:
491
+ """Compute the beta function B(x, y). Input: 2, 3."""
492
+ return special.beta(x, y)
493
+
494
+ @math_tool_handler
495
+ def erf_function(x: float) -> float:
496
+ """Compute the error function erf(x). Input: 1."""
497
+ return special.erf(x)
498
+
499
+ # --- Fourier Transform functions ---
500
+ @math_tool_handler
501
+ def fft_transform(y: List[float]) -> List[complex]:
502
+ """Compute the Fast Fourier Transform (FFT) of a real sequence y. Input: [0, 1, 0, -1]."""
503
+ if not y:
504
+ raise ValueError("Input list cannot be empty for FFT.")
505
+ return fft.fft(np.array(y))
506
+
507
+ @math_tool_handler
508
+ def ifft_transform(y_complex: List[complex]) -> List[complex]:
509
+ """Compute the inverse Fast Fourier Transform (IFFT) of a complex sequence. Input: result from fft_transform."""
510
+ if not y_complex:
511
+ raise ValueError("Input list cannot be empty for IFFT.")
512
+ return fft.ifft(np.array(y_complex))
513
+
514
+ # --- Tool List Creation ---
515
+
516
+ def get_python_math_tools() -> List[FunctionTool]:
517
+ """Returns a list of FunctionTools for the Python math functions."""
518
+ py_tools = [
519
+ # Symbolic
520
+ FunctionTool.from_defaults(fn=solve_symbolic_equation),
521
+ FunctionTool.from_defaults(fn=compute_derivative),
522
+ FunctionTool.from_defaults(fn=compute_integral),
523
+ FunctionTool.from_defaults(fn=compute_limit),
524
+ FunctionTool.from_defaults(fn=simplify_expression),
525
+ FunctionTool.from_defaults(fn=expand_expression),
526
+ FunctionTool.from_defaults(fn=factor_expression),
527
+ # Matrix
528
+ FunctionTool.from_defaults(fn=matrix_addition),
529
+ FunctionTool.from_defaults(fn=matrix_subtraction),
530
+ FunctionTool.from_defaults(fn=matrix_multiplication),
531
+ FunctionTool.from_defaults(fn=matrix_inverse),
532
+ FunctionTool.from_defaults(fn=matrix_determinant),
533
+ FunctionTool.from_defaults(fn=matrix_transpose),
534
+ FunctionTool.from_defaults(fn=matrix_rank),
535
+ FunctionTool.from_defaults(fn=matrix_trace),
536
+ FunctionTool.from_defaults(fn=matrix_norm),
537
+ FunctionTool.from_defaults(fn=eigenvalues),
538
+ FunctionTool.from_defaults(fn=eigenvectors),
539
+ FunctionTool.from_defaults(fn=svd_decompose),
540
+ FunctionTool.from_defaults(fn=lu_decompose),
541
+ FunctionTool.from_defaults(fn=qr_decompose),
542
+ # Statistics
543
+ FunctionTool.from_defaults(fn=mean),
544
+ FunctionTool.from_defaults(fn=median),
545
+ FunctionTool.from_defaults(fn=std_dev),
546
+ FunctionTool.from_defaults(fn=variance),
547
+ FunctionTool.from_defaults(fn=percentile),
548
+ FunctionTool.from_defaults(fn=covariance),
549
+ FunctionTool.from_defaults(fn=correlation),
550
+ FunctionTool.from_defaults(fn=linear_regression),
551
+ # Numerical
552
+ FunctionTool.from_defaults(fn=find_polynomial_roots),
553
+ FunctionTool.from_defaults(fn=interpolate_value),
554
+ FunctionTool.from_defaults(fn=numerical_integration),
555
+ FunctionTool.from_defaults(fn=solve_ode),
556
+ # Vector
557
+ FunctionTool.from_defaults(fn=dot_product),
558
+ FunctionTool.from_defaults(fn=cross_product),
559
+ FunctionTool.from_defaults(fn=vector_magnitude),
560
+ FunctionTool.from_defaults(fn=vector_normalize),
561
+ FunctionTool.from_defaults(fn=vector_angle),
562
+ # Probability
563
+ FunctionTool.from_defaults(fn=binomial_pmf),
564
+ FunctionTool.from_defaults(fn=normal_pdf),
565
+ FunctionTool.from_defaults(fn=normal_cdf),
566
+ FunctionTool.from_defaults(fn=poisson_pmf),
567
+ # Special Functions
568
+ FunctionTool.from_defaults(fn=gamma_function),
569
+ FunctionTool.from_defaults(fn=beta_function),
570
+ FunctionTool.from_defaults(fn=erf_function),
571
+ # Fourier
572
+ FunctionTool.from_defaults(fn=fft_transform),
573
+ FunctionTool.from_defaults(fn=ifft_transform),
574
+ ]
575
+ # Update descriptions for clarity if needed (optional)
576
+ for tool in py_tools:
577
+ tool.metadata.description = f"(Python) {tool.metadata.description}"
578
+ logger.info(f"Created {len(py_tools)} Python math tools.")
579
+ return py_tools
580
+
581
+ # --- Wolfram Alpha Tool ---
582
+ _wolfram_alpha_tools = None
583
+
584
+ def get_wolfram_alpha_tools() -> List[FunctionTool]:
585
+ """Initializes and returns Wolfram Alpha tools (singleton)."""
586
+ global _wolfram_alpha_tools
587
+ if _wolfram_alpha_tools is None:
588
+ logger.info("Initializing WolframAlphaToolSpec...")
589
+ wolfram_alpha_app_id = os.getenv("WOLFRAM_ALPHA_APP_ID")
590
+ if not wolfram_alpha_app_id:
591
+ logger.warning("WOLFRAM_ALPHA_APP_ID not set. Wolfram Alpha tools will be unavailable.")
592
+ _wolfram_alpha_tools = []
593
+ else:
594
+ try:
595
+ spec = WolframAlphaToolSpec(app_id=wolfram_alpha_app_id)
596
+ _wolfram_alpha_tools = spec.to_tool_list()
597
+ # Add prefix to description for clarity
598
+ for tool in _wolfram_alpha_tools:
599
+ tool.metadata.description = f"(WolframAlpha) {tool.metadata.description}"
600
+ logger.info(f"WolframAlpha tools initialized: {len(_wolfram_alpha_tools)} tools.")
601
+ except Exception as e:
602
+ logger.error(f"Failed to initialize WolframAlpha tools: {e}", exc_info=True)
603
+ _wolfram_alpha_tools = []
604
+ return _wolfram_alpha_tools
605
+
606
+ # --- Agent Initialization ---
607
+
608
+ def initialize_math_agent() -> ReActAgent:
609
+ """Initializes the Math Agent with Python and Wolfram Alpha tools."""
610
+ logger.info("Initializing MathAgent...")
611
+
612
+ # Configuration
613
+ agent_llm_model = os.getenv("MATH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
614
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
615
+
616
+ if not gemini_api_key:
617
+ logger.error("GEMINI_API_KEY not found in environment variables for MathAgent.")
618
+ raise ValueError("GEMINI_API_KEY must be set for MathAgent")
619
+
620
+ try:
621
+ llm = GoogleGenAI(
622
+ api_key=gemini_api_key,
623
+ model=agent_llm_model,
624
+ )
625
+ logger.info(f"Using agent LLM: {agent_llm_model}")
626
+
627
+ # Combine Python tools and Wolfram Alpha tools
628
+ all_tools = get_python_math_tools() + get_wolfram_alpha_tools()
629
+ if not all_tools:
630
+ logger.warning("No math tools available (Python or WolframAlpha). MathAgent may be ineffective.")
631
+
632
+ # System prompt (consider loading from file)
633
+ system_prompt = """\
634
+ You are MathAgent, a powerful mathematical problem solver. Your goal is to accurately answer mathematical questions using the available tools.
635
+
636
+ Available Tools:
637
+ - Python Tools: A comprehensive suite for symbolic math (SymPy), numerical computation (NumPy/SciPy), statistics, linear algebra, calculus, ODEs, and transforms. Prefixed with '(Python)'. Use these for precise calculations when the method is clear.
638
+ - WolframAlpha Tool: Accesses Wolfram Alpha for complex queries, natural language math questions, data, and real-world facts. Prefixed with '(WolframAlpha)'. Use this for broader questions, knowledge-based math, or when Python tools are insufficient.
639
+
640
+ Workflow:
641
+ 1. **Thought**: Analyze the question. Determine the mathematical concepts involved. Decide the best tool or sequence of tools to use. Prefer Python tools for specific, well-defined calculations. Use WolframAlpha for complex, ambiguous, or knowledge-based queries.
642
+ 2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements (e.g., list of lists for matrices, strings for symbolic expressions).
643
+ 3. **Observation**: Examine the tool's output. Check for errors or unexpected results.
644
+ 4. **Iteration**: If the result is incorrect or incomplete, rethink the approach. Try a different tool, adjust parameters, or break the problem down further. If a Python tool fails, consider rephrasing for WolframAlpha.
645
+ 5. **Final Answer**: Once the correct answer is obtained, state it clearly and concisely. Provide the numerical result, symbolic expression, or explanation as requested.
646
+ 6. **Hand-Off**: Pass the final mathematical result or analysis to **planner_agent** for integration into the overall response.
647
+
648
+ Constraints:
649
+ - Always use a tool for calculations; do not perform calculations yourself.
650
+ - Clearly state which tool you are using and why.
651
+ - Handle potential errors gracefully and report them if they prevent finding a solution.
652
+ - Pay close attention to input formats required by each tool (e.g., lists for vectors/matrices, strings for symbolic expressions).
653
+ """
654
+
655
+ agent = ReActAgent(
656
+ name="math_agent",
657
+ description=(
658
+ "MathAgent solves mathematical problems using a suite of Python tools (SymPy, NumPy, SciPy) and WolframAlpha. "
659
+ "It handles symbolic math, numerical computation, statistics, linear algebra, calculus, and more."
660
+ ),
661
+ tools=all_tools,
662
+ llm=llm,
663
+ system_prompt=system_prompt,
664
+ can_handoff_to=["planner_agent"],
665
+ )
666
+ logger.info("MathAgent initialized successfully.")
667
+ return agent
668
+
669
+ except Exception as e:
670
+ logger.error(f"Error during MathAgent initialization: {e}", exc_info=True)
671
+ raise
672
+
673
+ # Example usage (for testing if run directly)
674
+ if __name__ == "__main__":
675
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
676
+ logger.info("Running math_agent.py directly for testing...")
677
+
678
+ # Ensure API keys are set for testing
679
+ required_keys = ["GEMINI_API_KEY"] # WOLFRAM_ALPHA_APP_ID is optional
680
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
681
+ if missing_keys:
682
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
683
+ else:
684
+ if not os.getenv("WOLFRAM_ALPHA_APP_ID"):
685
+ print("Warning: WOLFRAM_ALPHA_APP_ID not set. WolframAlpha tools will be unavailable for testing.")
686
+ try:
687
+ test_agent = initialize_math_agent()
688
+ print("Math Agent initialized successfully for testing.")
689
+ # Example test
690
+ # result = test_agent.chat("What is the integral of x**2 from 0 to 1?")
691
+ # print(f"Test query result: {result}")
692
+ # result2 = test_agent.chat("what is the population of france?") # Test WolframAlpha
693
+ # print(f"Test query 2 result: {result2}")
694
+ except Exception as e:
695
+ print(f"Error during testing: {e}")
696
+
agents/planner_agent.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from typing import List, Dict
4
+ from dotenv import load_dotenv
5
+
6
+ from llama_index.core.agent.workflow import ReActAgent
7
+ from llama_index.core.tools import FunctionTool
8
+ from llama_index.llms.google_genai import GoogleGenAI
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Setup logging
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Helper function to load prompt from file
17
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
18
+ """Loads a prompt from a text file."""
19
+ try:
20
+ # Assuming the prompt file is in the same directory as the agent script
21
+ script_dir = os.path.dirname(__file__)
22
+ prompt_path = os.path.join(script_dir, filename)
23
+ with open(prompt_path, "r") as f:
24
+ prompt = f.read()
25
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
26
+ return prompt
27
+ except FileNotFoundError:
28
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
29
+ return default_prompt
30
+ except Exception as e:
31
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
32
+ return default_prompt
33
+
34
+ # --- Tool Functions ---
35
+
36
+ def plan(objective: str) -> List[str]:
37
+ """
38
+ Generate a list of sub-steps (4-8) from the given objective using an LLM.
39
+ Args:
40
+ objective (str): The research or task objective.
41
+ Returns:
42
+ List[str]: A list of sub-steps as strings, or an error message list.
43
+ """
44
+ logger.info(f"Generating plan for objective: {objective[:100]}...")
45
+
46
+ # Configuration for planning LLM
47
+ planner_llm_model = os.getenv("PLANNER_TOOL_LLM_MODEL", "models/gemini-1.5-pro") # Specific model for this tool?
48
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
49
+ if not gemini_api_key:
50
+ logger.error("GEMINI_API_KEY not found for planning tool LLM.")
51
+ return ["Error: GEMINI_API_KEY not set for planning."]
52
+
53
+ # Prompt for the LLM to generate sub-steps
54
+ input_prompt = (
55
+ "You are a research assistant. "
56
+ "Given an objective, break it down into a list of 4-8 concise, actionable sub-steps. "
57
+ "Ensure the steps are logically ordered.\n"
58
+ f"Objective: {objective}\n"
59
+ "Sub-steps (one per line, numbered):"
60
+ )
61
+
62
+ try:
63
+ llm = GoogleGenAI(api_key=gemini_api_key, model=planner_llm_model)
64
+ logger.info(f"Using planning LLM: {planner_llm_model}")
65
+ response = llm.complete(input_prompt)
66
+
67
+ # Post-process: split lines into sub-steps, remove numbering if present
68
+ lines = response.text.strip().split("\n")
69
+ sub_steps = []
70
+ for line in lines:
71
+ line = line.strip()
72
+ if not line:
73
+ continue
74
+ # Remove potential leading numbering (e.g., "1. ", "- ")
75
+ if line and line[0].isdigit() and "." in line[:3]:
76
+ text = line.split(".", 1)[1].strip()
77
+ elif line.startswith("- "):
78
+ text = line[2:].strip()
79
+ else:
80
+ text = line
81
+
82
+ if text:
83
+ sub_steps.append(text)
84
+
85
+ if not sub_steps:
86
+ logger.warning("LLM generated no sub-steps for the objective.")
87
+ return ["Error: Failed to generate sub-steps."]
88
+
89
+ logger.info(f"Generated {len(sub_steps)} sub-steps.")
90
+ return sub_steps
91
+
92
+ except Exception as e:
93
+ logger.error(f"LLM call failed during planning: {e}", exc_info=True)
94
+ return [f"Error during planning: {e}"]
95
+
96
+ def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
97
+ """
98
+ Aggregate results from sub-steps into a coherent final report using an LLM.
99
+ Args:
100
+ results (List[Dict[str, str]]): List of dictionaries, each with "sub_step" and "answer" keys.
101
+ Returns:
102
+ str: A unified, well-structured response, or an error message.
103
+ """
104
+ logger.info(f"Synthesizing results from {len(results)} sub-steps...")
105
+ if not results:
106
+ logger.warning("Synthesize called with empty results list.")
107
+ return "No results provided to synthesize."
108
+
109
+ # Format the results for the synthesis prompt
110
+ summary_blocks = ""
111
+ for i, result in enumerate(results):
112
+ sub_step = result.get("sub_step", f"Step {i+1}")
113
+ answer = result.get("answer", "No answer provided.")
114
+ summary_blocks += f"Sub-step {i+1}: {sub_step}\nAnswer {i+1}: {answer}\n\n"
115
+
116
+ # Configuration for synthesis LLM
117
+ synthesizer_llm_model = os.getenv("SYNTHESIZER_LLM_MODEL", "models/gemini-1.5-pro") # Specific model?
118
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
119
+ if not gemini_api_key:
120
+ logger.error("GEMINI_API_KEY not found for synthesis tool LLM.")
121
+ return "Error: GEMINI_API_KEY not set for synthesis."
122
+
123
+ # Prompt for the LLM
124
+ input_prompt = f"""You are an expert synthesizer. Given the following sub-steps and their answers derived from an initial objective, produce a single, coherent, comprehensive final report that addresses the original objective:
125
+
126
+ --- SUB-STEP RESULTS ---
127
+ {summary_blocks.strip()}
128
+ --- END SUB-STEP RESULTS ---
129
+
130
+ Generate the Final Report:
131
+ """
132
+
133
+ try:
134
+ llm = GoogleGenAI(api_key=gemini_api_key, model=synthesizer_llm_model)
135
+ logger.info(f"Using synthesis LLM: {synthesizer_llm_model}")
136
+ response = llm.complete(input_prompt)
137
+ logger.info("Synthesis successful.")
138
+ return response.text
139
+ except Exception as e:
140
+ logger.error(f"LLM call failed during synthesis: {e}", exc_info=True)
141
+ return f"Error during synthesis: {e}"
142
+
143
+ # --- Tool Definitions ---
144
+ synthesize_tool = FunctionTool.from_defaults(
145
+ fn=synthesize_and_respond,
146
+ name="synthesize_and_respond",
147
+ description=(
148
+ "Aggregates results from multiple sub-steps into a final coherent report. "
149
+ "Input: results (List[Dict[str, str]]) where each dict has \"sub_step\" and \"answer\". "
150
+ "Output: A unified report (str) or error message."
151
+ ),
152
+ )
153
+
154
+ generate_substeps_tool = FunctionTool.from_defaults(
155
+ fn=plan,
156
+ name="generate_substeps",
157
+ description=(
158
+ "Decomposes a high-level objective into a concise roadmap of 4–8 actionable sub-steps using an LLM. "
159
+ "Input: objective (str). Output: List of sub-step strings (List[str]) or error list."
160
+ )
161
+ )
162
+
163
+ # --- Agent Initialization ---
164
+ def initialize_planner_agent() -> ReActAgent:
165
+ """Initializes the Planner Agent."""
166
+ logger.info("Initializing PlannerAgent...")
167
+
168
+ # Configuration for the agent's main LLM
169
+ agent_llm_model = os.getenv("PLANNER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
170
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
171
+
172
+ if not gemini_api_key:
173
+ logger.error("GEMINI_API_KEY not found for PlannerAgent.")
174
+ raise ValueError("GEMINI_API_KEY must be set for PlannerAgent")
175
+
176
+ try:
177
+ llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
178
+ logger.info(f"Using agent LLM: {agent_llm_model}")
179
+
180
+ # Load system prompt
181
+ default_system_prompt = ("You are PlannerAgent... [Default prompt content - replace with actual]" # Placeholder
182
+ )
183
+ system_prompt = load_prompt_from_file("../prompts/planner_agent_prompt.txt", default_system_prompt)
184
+ if system_prompt == default_system_prompt:
185
+ logger.warning("Using default/fallback system prompt for PlannerAgent.")
186
+
187
+ # Define available tools
188
+ tools = [generate_substeps_tool, synthesize_tool]
189
+
190
+ # Define valid handoff targets
191
+ valid_handoffs = [
192
+ "code_agent",
193
+ "research_agent",
194
+ "math_agent",
195
+ "role_agent",
196
+ "image_analyzer_agent",
197
+ "text_analyzer_agent",
198
+ "verifier_agent",
199
+ "reasoning_agent"
200
+ ]
201
+
202
+ agent = ReActAgent(
203
+ name="planner_agent",
204
+ description=(
205
+ "Strategically plans tasks by breaking down objectives into sub-steps using `generate_substeps`. "
206
+ "Orchestrates execution by handing off sub-steps to specialized agents. "
207
+ "Synthesizes final results using `synthesize_and_respond`."
208
+ ),
209
+ tools=tools,
210
+ llm=llm,
211
+ system_prompt=system_prompt,
212
+ can_handoff_to=valid_handoffs,
213
+ )
214
+ logger.info("PlannerAgent initialized successfully.")
215
+ return agent
216
+
217
+ except Exception as e:
218
+ logger.error(f"Error during PlannerAgent initialization: {e}", exc_info=True)
219
+ raise
220
+
221
+ # Example usage (for testing if run directly)
222
+ if __name__ == "__main__":
223
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
224
+ logger.info("Running planner_agent.py directly for testing...")
225
+
226
+ # Ensure API key is set
227
+ if not os.getenv("GEMINI_API_KEY"):
228
+ print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
229
+ else:
230
+ try:
231
+ # Test plan generation
232
+ print("\nTesting plan generation...")
233
+ test_objective = "Analyze the market trends for electric vehicles in Europe for 2024."
234
+ substeps = plan(test_objective)
235
+ print(f"Generated Sub-steps:\n{substeps}")
236
+
237
+ # Test synthesis
238
+ print("\nTesting synthesis...")
239
+ test_results = [
240
+ {"sub_step": "Identify key EV manufacturers in Europe.", "answer": "Tesla, VW, Stellantis, Renault."},
241
+ {"sub_step": "Find recent sales data.", "answer": "EV sales grew 25% year-over-year in Q1 2024."},
242
+ {"sub_step": "Analyze government incentives.", "answer": "Germany reduced subsidies, France maintained them."}
243
+ ]
244
+ report = synthesize_and_respond(test_results)
245
+ print(f"Synthesized Report:\n{report}")
246
+
247
+ # Initialize the agent (optional)
248
+ # test_agent = initialize_planner_agent()
249
+ # print("\nPlanner Agent initialized successfully for testing.")
250
+
251
+ except Exception as e:
252
+ print(f"Error during testing: {e}")
253
+
agents/reasoning_agent.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
+
5
+ from llama_index.core.agent.workflow import ReActAgent
6
+ from llama_index.core.tools import FunctionTool
7
+ from llama_index.llms.google_genai import GoogleGenAI
8
+ from llama_index.llms.openai import OpenAI
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Setup logging
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Helper function to load prompt from file
17
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
18
+ """Loads a prompt from a text file."""
19
+ try:
20
+ # Assuming the prompt file is in the same directory as the agent script
21
+ script_dir = os.path.dirname(__file__)
22
+ prompt_path = os.path.join(script_dir, filename)
23
+ with open(prompt_path, "r") as f:
24
+ prompt = f.read()
25
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
26
+ return prompt
27
+ except FileNotFoundError:
28
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
29
+ return default_prompt
30
+ except Exception as e:
31
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
32
+ return default_prompt
33
+
34
+ # --- Tool Function ---
35
+
36
+ def reasoning_tool_fn(context: str) -> str:
37
+ """
38
+ Perform chain-of-thought reasoning over the provided context using a dedicated LLM.
39
+ Args:
40
+ context (str): The conversation/workflow history and current problem statement.
41
+ Returns:
42
+ str: A structured reasoning trace and conclusion, or an error message.
43
+ """
44
+ logger.info(f"Executing reasoning tool with context length: {len(context)}")
45
+
46
+ # Configuration for the reasoning LLM (OpenAI in the original)
47
+ reasoning_llm_model = os.getenv("REASONING_LLM_MODEL", "gpt-4o-mini") # Use gpt-4o-mini as default
48
+ openai_api_key = os.getenv("ALPAFLOW_OPENAI_API_KEY") # Specific key from original code
49
+
50
+ if not openai_api_key:
51
+ logger.error("ALPAFLOW_OPENAI_API_KEY not found for reasoning tool LLM.")
52
+ return "Error: ALPAFLOW_OPENAI_API_KEY must be set to use the reasoning tool."
53
+
54
+ # Define the prompt for the reasoning LLM
55
+ reasoning_prompt = f"""You are an expert reasoning engine. Analyze the following workflow context and problem statement:
56
+
57
+ --- CONTEXT START ---
58
+ {context}
59
+ --- CONTEXT END ---
60
+
61
+ Perform the following steps:
62
+ 1. **Comprehension**: Identify the core question/problem and key constraints from the context.
63
+ 2. **Decomposition**: Break the problem into logical sub-steps.
64
+ 3. **Chain-of-Thought**: Reason through each sub-step, stating assumptions and deriving implications.
65
+ 4. **Verification**: Check conclusions against constraints.
66
+ 5. **Synthesis**: Integrate results into a cohesive answer/recommendation.
67
+ 6. **Clarity**: Use precise language.
68
+
69
+ Respond with your numbered reasoning steps followed by a concise final conclusion or recommendation.
70
+ """
71
+
72
+ try:
73
+ # Note: Original used OpenAI with a specific key and model. Retaining that.
74
+ # Consider adding `reasoning_effort="high"` if supported and desired.
75
+ llm = OpenAI(
76
+ model=reasoning_llm_model,
77
+ api_key=openai_api_key,
78
+ # reasoning_effort="high" # Add if needed and supported by the specific OpenAI integration
79
+ )
80
+ logger.info(f"Using reasoning LLM: {reasoning_llm_model}")
81
+ response = llm.complete(reasoning_prompt)
82
+ logger.info("Reasoning tool execution successful.")
83
+ return response.text
84
+ except Exception as e:
85
+ logger.error(f"Error during reasoning tool LLM call: {e}", exc_info=True)
86
+ return f"Error during reasoning: {e}"
87
+
88
+ # --- Tool Definition ---
89
+ reasoning_tool = FunctionTool.from_defaults(
90
+ fn=reasoning_tool_fn,
91
+ name="reasoning_tool",
92
+ description=(
93
+ "Applies detailed chain-of-thought reasoning to the provided workflow context using a dedicated LLM. "
94
+ "Input: context (str). Output: Reasoning steps and conclusion (str) or error message."
95
+ ),
96
+ )
97
+
98
+ # --- Agent Initialization ---
99
+ def initialize_reasoning_agent() -> ReActAgent:
100
+ """Initializes the Reasoning Agent."""
101
+ logger.info("Initializing ReasoningAgent...")
102
+
103
+ # Configuration for the agent's main LLM (Google GenAI)
104
+ agent_llm_model = os.getenv("REASONING_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
105
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
106
+
107
+ if not gemini_api_key:
108
+ logger.error("GEMINI_API_KEY not found for ReasoningAgent.")
109
+ raise ValueError("GEMINI_API_KEY must be set for ReasoningAgent")
110
+
111
+ try:
112
+ llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
113
+ logger.info(f"Using agent LLM: {agent_llm_model}")
114
+
115
+ # Load system prompt
116
+ default_system_prompt = ("You are ReasoningAgent... [Default prompt content - replace with actual]" # Placeholder
117
+ )
118
+ system_prompt = load_prompt_from_file("../prompts/reasoning_agent_prompt.txt", default_system_prompt)
119
+ if system_prompt == default_system_prompt:
120
+ logger.warning("Using default/fallback system prompt for ReasoningAgent.")
121
+
122
+ agent = ReActAgent(
123
+ name="reasoning_agent",
124
+ description=(
125
+ "A pure reasoning agent that uses the `reasoning_tool` for detailed chain-of-thought analysis "
126
+ "on the provided context, then hands off the result to the `planner_agent`."
127
+ ),
128
+ tools=[reasoning_tool], # Only has access to the reasoning tool
129
+ llm=llm,
130
+ system_prompt=system_prompt,
131
+ can_handoff_to=["planner_agent"],
132
+ )
133
+ logger.info("ReasoningAgent initialized successfully.")
134
+ return agent
135
+
136
+ except Exception as e:
137
+ logger.error(f"Error during ReasoningAgent initialization: {e}", exc_info=True)
138
+ raise
139
+
140
+ # Example usage (for testing if run directly)
141
+ if __name__ == "__main__":
142
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
143
+ logger.info("Running reasoning_agent.py directly for testing...")
144
+
145
+ # Check required keys
146
+ required_keys = ["GEMINI_API_KEY", "ALPAFLOW_OPENAI_API_KEY"]
147
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
148
+ if missing_keys:
149
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
150
+ else:
151
+ try:
152
+ # Test the reasoning tool directly
153
+ print("\nTesting reasoning_tool_fn...")
154
+ test_context = "User asked: What is the capital of France? ResearchAgent found: Paris. VerifierAgent confirmed: High confidence."
155
+ reasoning_output = reasoning_tool_fn(test_context)
156
+ print(f"Reasoning Tool Output:\n{reasoning_output}")
157
+
158
+ # Initialize the agent (optional)
159
+ # test_agent = initialize_reasoning_agent()
160
+ # print("\nReasoning Agent initialized successfully for testing.")
161
+ # Example chat (would require context passing mechanism)
162
+ # result = test_agent.chat("Synthesize the findings about the capital of France.")
163
+ # print(f"Agent chat result: {result}")
164
+
165
+ except Exception as e:
166
+ print(f"Error during testing: {e}")
167
+
agents/research_agent.py ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import logging
4
+ import re # Import regex for video ID extraction
5
+ from typing import List, Optional, Dict # Added Dict
6
+ from dotenv import load_dotenv
7
+
8
+ from llama_index.core.agent.workflow import ReActAgent
9
+ from llama_index.core.tools import FunctionTool
10
+ from llama_index.llms.google_genai import GoogleGenAI
11
+ from llama_index.tools.google import GoogleSearchToolSpec
12
+ from llama_index.tools.tavily_research import TavilyToolSpec
13
+ from llama_index.tools.wikipedia import WikipediaToolSpec
14
+ from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
15
+ from llama_index.tools.yahoo_finance import YahooFinanceToolSpec
16
+ from llama_index.tools.arxiv import ArxivToolSpec
17
+
18
+ # Attempt to import browser tools; handle import errors gracefully
19
+ try:
20
+ from selenium import webdriver
21
+ from selenium.webdriver.common.by import By
22
+ from selenium.webdriver.common.keys import Keys
23
+ from selenium.common.exceptions import WebDriverException, NoSuchElementException, TimeoutException
24
+ from helium import start_chrome, go_to, find_all, Text, kill_browser, get_driver, click, write, press
25
+ SELENIUM_AVAILABLE = True
26
+ except ImportError:
27
+ logging.warning("Selenium or Helium not installed. Browser interaction tools will be unavailable.")
28
+ SELENIUM_AVAILABLE = False
29
+
30
+ # Attempt to import YouTube transcript API
31
+ try:
32
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
33
+ YOUTUBE_TRANSCRIPT_API_AVAILABLE = True
34
+ except ImportError:
35
+ logging.warning("youtube-transcript-api not installed. YouTube transcript tool will be unavailable.")
36
+ YOUTUBE_TRANSCRIPT_API_AVAILABLE = False
37
+
38
+ # Load environment variables
39
+ load_dotenv()
40
+
41
+ # Setup logging
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # --- Helper function to extract YouTube Video ID ---
45
+ def extract_video_id(url: str) -> Optional[str]:
46
+ """Extracts the YouTube video ID from various URL formats."""
47
+ # Standard watch URL: https://www.youtube.com/watch?v=VIDEO_ID
48
+ match = re.search(r'(?:v=|/v/|embed/|youtu\.be/|/shorts/)([A-Za-z0-9_-]+)', url)
49
+ if match:
50
+ return match.group(1)
51
+ return None
52
+
53
+ # --- YouTube Transcript Tool ---
54
+ def get_youtube_transcript(video_url_or_id: str, languages=None) -> str:
55
+ """Fetches the transcript for a YouTube video using its URL or video ID.
56
+ Specify preferred languages as a list (e.g., ["en", "es"]).
57
+ Returns the transcript text or an error message.
58
+ """
59
+ if languages is None:
60
+ languages = ["en"]
61
+ if not YOUTUBE_TRANSCRIPT_API_AVAILABLE:
62
+ return "Error: youtube-transcript-api library is required but not installed."
63
+
64
+ logger.info(f"Attempting to fetch YouTube transcript for: {video_url_or_id}")
65
+ video_id = extract_video_id(video_url_or_id)
66
+ if not video_id:
67
+ # Assume it might be an ID already if extraction fails
68
+ if re.match(r"^[a-zA-Z0-9_\-]+$", video_url_or_id):
69
+ video_id = video_url_or_id
70
+ logger.info("Input treated as video ID.")
71
+ else:
72
+ logger.error(f"Could not extract valid YouTube video ID from: {video_url_or_id}")
73
+ return f"Error: Invalid YouTube URL or Video ID format: {video_url_or_id}"
74
+
75
+ try:
76
+ # Fetch available transcripts
77
+ api = YouTubeTranscriptApi()
78
+ transcript_list = api.list(video_id)
79
+
80
+ # Try to find a transcript in the specified languages
81
+ transcript = transcript_list.find_transcript(languages)
82
+
83
+ # Fetch the actual transcript data (list of dicts)
84
+ transcript_data = transcript.fetch()
85
+
86
+ # Combine the text parts into a single string
87
+ full_transcript = " ".join(snippet.text for snippet in transcript_data)
88
+
89
+ full_transcript = " ".join(snippet.text for snippet in transcript_data)
90
+ logger.info(f"Successfully fetched transcript for video ID {video_id} in language {transcript.language}.")
91
+ return full_transcript
92
+
93
+ except TranscriptsDisabled:
94
+ logger.warning(f"Transcripts are disabled for video ID: {video_id}")
95
+ return f"Error: Transcripts are disabled for this video (ID: {video_id})."
96
+ except NoTranscriptFound as e:
97
+ logger.warning(f"No transcript found for video ID {video_id} in languages {languages}. Available: {e.available_transcripts}")
98
+ # Try fetching any available transcript if specific languages failed
99
+ try:
100
+ logger.info(f"Attempting to fetch any available transcript for {video_id}")
101
+ any_transcript = transcript_list.find_generated_transcript(transcript_list.manually_created_transcripts.keys() or transcript_list.generated_transcripts.keys())
102
+ any_transcript_data = any_transcript.fetch()
103
+ full_transcript = " ".join([item["text"] for item in any_transcript_data])
104
+ logger.info(f"Successfully fetched fallback transcript for video ID {video_id} in language {any_transcript.language}.")
105
+ return full_transcript
106
+ except Exception as fallback_e:
107
+ logger.error(f"Could not find any transcript for video ID {video_id}. Original error: {e}. Fallback error: {fallback_e}")
108
+ return f"Error: No transcript found for video ID {video_id} in languages {languages} or any fallback language."
109
+ except Exception as e:
110
+ logger.error(f"Unexpected error fetching transcript for video ID {video_id}: {e}", exc_info=True)
111
+ return f"Error fetching transcript: {e}"
112
+
113
+ # --- Browser Interaction Tools (Conditional on Selenium/Helium availability) ---
114
+
115
+ # Global browser instance (managed by initializer)
116
+ _browser_instance = None
117
+ _browser_driver = None
118
+
119
+ # Helper decorator for browser tool error handling and logging
120
+ def browser_tool_handler(func):
121
+ def wrapper(*args, **kwargs):
122
+ if not SELENIUM_AVAILABLE:
123
+ return "Error: Browser tools require Selenium and Helium to be installed."
124
+ if _browser_instance is None or _browser_driver is None:
125
+ # Attempt to initialize if not already done (e.g., if called directly)
126
+ # This is not ideal, initialization should happen via get_research_initializer()
127
+ logger.warning("Browser accessed before explicit initialization. Attempting to initialize now.")
128
+ try:
129
+ get_research_initializer() # This will initialize the browser
130
+ if _browser_instance is None or _browser_driver is None:
131
+ return "Error: Browser initialization failed."
132
+ except Exception as init_err:
133
+ return f"Error: Browser initialization failed: {init_err}"
134
+
135
+ func_name = func.__name__
136
+ logger.info(f"Executing browser tool: {func_name} with args: {args}, kwargs: {kwargs}")
137
+ try:
138
+ result = func(*args, **kwargs)
139
+ logger.info(f"Tool {func_name} executed successfully.")
140
+ # Ensure result is a string for consistency
141
+ return str(result) if result is not None else f"{func_name} completed."
142
+ except (NoSuchElementException, WebDriverException, TimeoutException) as e:
143
+ logger.warning(f"Browser error in {func_name}: {e.__class__.__name__} - {str(e).split()[0]}")
144
+ return f"Error in {func_name}: {e.__class__.__name__} - {str(e).split()[0]}"
145
+ except Exception as e:
146
+ logger.error(f"Unexpected error in {func_name}: {e}", exc_info=True)
147
+ return f"Unexpected error in {func_name}: {e}"
148
+ return wrapper
149
+
150
+ @browser_tool_handler
151
+ def visit(url: str, wait_seconds: float = 3.0) -> str:
152
+ """Navigate the browser to the specified URL and wait for the page to load."""
153
+ logger.info(f"Navigating to {url} and waiting {wait_seconds}s...")
154
+ go_to(url)
155
+ time.sleep(wait_seconds) # Wait for dynamic content
156
+ current_url = _browser_driver.current_url
157
+ return f"Successfully navigated to: {current_url}"
158
+
159
+ @browser_tool_handler
160
+ def get_text_by_css(selector: str) -> List[str]:
161
+ """Extract text from all elements matching a CSS selector. Use selector=\"body\" for all visible text."""
162
+ logger.info(f"Extracting text using CSS selector: {selector}")
163
+ if selector.lower() == "body":
164
+ # Helium Text() might be too broad, let's try body tag first
165
+ try:
166
+ body_element = _browser_driver.find_element(By.TAG_NAME, "body")
167
+ all_text = body_element.text.split("\n") # Split into lines
168
+ # Filter out empty lines
169
+ non_empty_text = [line.strip() for line in all_text if line.strip()]
170
+ logger.info(f"Extracted {len(non_empty_text)} lines of text from body.")
171
+ return non_empty_text
172
+ except NoSuchElementException:
173
+ logger.warning("Could not find body tag, falling back to Helium Text().")
174
+ elements = find_all(Text())
175
+ # Process Helium elements if fallback is used
176
+ texts = [elem.web_element.text for elem in elements if elem.web_element.is_displayed() and elem.web_element.text.strip()]
177
+ logger.info(f"Extracted {len(texts)} visible text elements using Helium Text().")
178
+ return texts
179
+ else:
180
+ # Use Selenium directly for more control
181
+ elements_selenium = _browser_driver.find_elements(By.CSS_SELECTOR, selector)
182
+ texts = [elem.text for elem in elements_selenium if elem.is_displayed() and elem.text.strip()]
183
+ logger.info(f"Extracted {len(texts)} visible text elements for selector {selector}.")
184
+ return texts
185
+
186
+ @browser_tool_handler
187
+ def get_page_html() -> str:
188
+ """Return the full HTML source of the current page."""
189
+ logger.info("Retrieving page HTML source...")
190
+ return _browser_driver.page_source
191
+
192
+ @browser_tool_handler
193
+ def click_element_by_css(selector: str, index: int = 0) -> str:
194
+ """Click on the Nth (0-based index) element matching the CSS selector."""
195
+ logger.info(f"Attempting to click element {index} matching selector: {selector}")
196
+ # Use Selenium directly for finding elements
197
+ elements_selenium = _browser_driver.find_elements(By.CSS_SELECTOR, selector)
198
+ if not elements_selenium:
199
+ raise NoSuchElementException(f"No elements found for selector: {selector}")
200
+ if index >= len(elements_selenium):
201
+ raise IndexError(f"Index {index} out of bounds. Only {len(elements_selenium)} elements found for selector: {selector}")
202
+
203
+ target_element = elements_selenium[index]
204
+ if not target_element.is_displayed() or not target_element.is_enabled():
205
+ logger.warning(f"Element {index} for selector {selector} is not visible or enabled. Attempting click anyway.")
206
+ # Try scrolling into view first
207
+ try:
208
+ _browser_driver.execute_script("arguments[0].scrollIntoView(true);", target_element)
209
+ time.sleep(0.5)
210
+ except Exception as scroll_err:
211
+ logger.warning(f"Could not scroll element into view: {scroll_err}")
212
+
213
+ # Use Helium click which might handle overlays better, passing the Selenium element
214
+ click(target_element)
215
+ time.sleep(1.5) # Increased wait after click
216
+ return f"Clicked element {index} matching selector {selector}. Current URL: {_browser_driver.current_url}"
217
+
218
+ @browser_tool_handler
219
+ def input_text_by_css(selector: str, text: str, index: int = 0, press_enter: bool = False) -> str:
220
+ """Input text into the Nth (0-based index) element matching the CSS selector. Optionally press Enter."""
221
+ logger.info(f"Attempting to input text into element {index} matching selector: {selector}")
222
+ # Use Selenium directly for finding elements
223
+ elements_selenium = _browser_driver.find_elements(By.CSS_SELECTOR, selector)
224
+ if not elements_selenium:
225
+ raise NoSuchElementException(f"No elements found for selector: {selector}")
226
+ if index >= len(elements_selenium):
227
+ raise IndexError(f"Index {index} out of bounds. Only {len(elements_selenium)} elements found for selector: {selector}")
228
+
229
+ target_element = elements_selenium[index]
230
+ if not target_element.is_displayed() or not target_element.is_enabled():
231
+ logger.warning(f"Input element {index} for selector {selector} is not visible or enabled. Attempting input anyway.")
232
+ # Try scrolling into view
233
+ try:
234
+ _browser_driver.execute_script("arguments[0].scrollIntoView(true);", target_element)
235
+ time.sleep(0.5)
236
+ except Exception as scroll_err:
237
+ logger.warning(f"Could not scroll input element into view: {scroll_err}")
238
+
239
+ # Use Helium write, passing the Selenium element
240
+ write(text, into=target_element)
241
+ time.sleep(0.5)
242
+ if press_enter:
243
+ press(Keys.ENTER)
244
+ time.sleep(1.5) # Wait longer if Enter was pressed
245
+ return f"Input text into element {index} ({selector}) and pressed Enter. Current URL: {_browser_driver.current_url}"
246
+ else:
247
+ return f"Input text into element {index} ({selector})."
248
+
249
+ @browser_tool_handler
250
+ def scroll_page(direction: str = "down", amount: str = "page") -> str:
251
+ """Scroll the page up or down by a specified amount ('page', 'top', 'bottom', or pixels)."""
252
+ logger.info(f"Scrolling {direction} by {amount}")
253
+ if direction not in ["up", "down"]:
254
+ raise ValueError("Direction must be \"up\" or \"down\".")
255
+
256
+ if amount == "page":
257
+ scroll_script = "window.scrollBy(0, window.innerHeight);" if direction == "down" else "window.scrollBy(0, -window.innerHeight);"
258
+ elif amount == "top":
259
+ scroll_script = "window.scrollTo(0, 0);"
260
+ elif amount == "bottom":
261
+ scroll_script = "window.scrollTo(0, document.body.scrollHeight);"
262
+ else:
263
+ try:
264
+ pixels = int(amount)
265
+ scroll_script = f"window.scrollBy(0, {pixels});" if direction == "down" else f"window.scrollBy(0, {-pixels});"
266
+ except ValueError:
267
+ raise ValueError("Amount must be \"page\", \"top\", \"bottom\", or a number of pixels.")
268
+
269
+ _browser_driver.execute_script(scroll_script)
270
+ time.sleep(1) # Wait for scroll effects
271
+ return f"Scrolled {direction} by {amount}."
272
+
273
+ @browser_tool_handler
274
+ def go_back() -> str:
275
+ """Navigate the browser back one step in its history."""
276
+ logger.info("Navigating back...")
277
+ _browser_driver.back()
278
+ time.sleep(1.5) # Wait after navigation
279
+ return f"Navigated back. Current URL: {_browser_driver.current_url}"
280
+
281
+ @browser_tool_handler
282
+ def close_popups() -> str:
283
+ """Send an ESC keypress to attempt to dismiss modals or pop-ups."""
284
+ logger.info("Sending ESC key...")
285
+ webdriver.ActionChains(_browser_driver).send_keys(Keys.ESCAPE).perform()
286
+ time.sleep(0.5)
287
+ return "Sent ESC key press."
288
+
289
+ # --- Search Engine & Data Source Tools ---
290
+
291
+ # --- Agent Initializer Class ---
292
+ class ResearchAgentInitializer:
293
+ def __init__(self):
294
+ logger.info("Initializing ResearchAgent resources...")
295
+ self.llm = None
296
+ self.browser_tools = []
297
+ self.search_tools = []
298
+ self.datasource_tools = []
299
+ self.youtube_tool = None # Added for YouTube tool
300
+
301
+ # Initialize LLM
302
+ self._initialize_llm()
303
+
304
+ # Initialize Browser (conditionally)
305
+ if SELENIUM_AVAILABLE:
306
+ self._initialize_browser()
307
+ self._create_browser_tools()
308
+ else:
309
+ logger.warning("Browser tools are disabled as Selenium/Helium are not available.")
310
+
311
+ # Initialize Search/Datasource Tools
312
+ self._create_search_tools()
313
+ self._create_datasource_tools()
314
+ self._create_youtube_tool() # Added
315
+
316
+ logger.info("ResearchAgent resources initialized.")
317
+
318
+ def _initialize_llm(self):
319
+ agent_llm_model = os.getenv("RESEARCH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
320
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
321
+ if not gemini_api_key:
322
+ logger.error("GEMINI_API_KEY not found for ResearchAgent LLM.")
323
+ raise ValueError("GEMINI_API_KEY must be set for ResearchAgent")
324
+ try:
325
+ self.llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
326
+ logger.info(f"ResearchAgent LLM initialized: {agent_llm_model}")
327
+ except Exception as e:
328
+ logger.error(f"Failed to initialize ResearchAgent LLM: {e}", exc_info=True)
329
+ raise
330
+
331
+ def _initialize_browser(self):
332
+ global _browser_instance, _browser_driver
333
+ if _browser_instance is None:
334
+ logger.info("Initializing browser (Chrome headless)...")
335
+ try:
336
+ chrome_options = webdriver.ChromeOptions()
337
+ # Configurable options from env vars
338
+ if os.getenv("RESEARCH_AGENT_CHROME_NO_SANDBOX", "true").lower() == "true":
339
+ chrome_options.add_argument("--no-sandbox")
340
+ if os.getenv("RESEARCH_AGENT_CHROME_DISABLE_DEV_SHM", "true").lower() == "true":
341
+ chrome_options.add_argument("--disable-dev-shm-usage")
342
+ # Add prefs for downloads/popups
343
+ chrome_options.add_experimental_option("prefs", {
344
+ "download.prompt_for_download": False,
345
+ "plugins.always_open_pdf_externally": True,
346
+ "profile.default_content_settings.popups": 0
347
+ })
348
+
349
+ # Start Chrome using Helium
350
+ _browser_instance = start_chrome(headless=True, options=chrome_options)
351
+ _browser_driver = get_driver() # Get the underlying Selenium driver
352
+ logger.info("Browser initialized successfully.")
353
+ except Exception as e:
354
+ logger.error(f"Failed to initialize browser: {e}", exc_info=True)
355
+ # Set flags to prevent tool usage
356
+ global SELENIUM_AVAILABLE
357
+ SELENIUM_AVAILABLE = False
358
+ _browser_instance = None
359
+ _browser_driver = None
360
+
361
+ def _create_browser_tools(self):
362
+ if not SELENIUM_AVAILABLE:
363
+ self.browser_tools = []
364
+ return
365
+
366
+ self.browser_tools = [
367
+ FunctionTool.from_defaults(fn=visit, name="visit_url"), # Renamed for clarity
368
+ FunctionTool.from_defaults(fn=get_text_by_css, name="get_text_by_css"),
369
+ FunctionTool.from_defaults(fn=get_page_html, name="get_page_html"),
370
+ FunctionTool.from_defaults(fn=click_element_by_css, name="click_element_by_css"),
371
+ FunctionTool.from_defaults(fn=input_text_by_css, name="input_text_by_css"),
372
+ FunctionTool.from_defaults(fn=scroll_page, name="scroll_page"),
373
+ FunctionTool.from_defaults(fn=go_back, name="navigate_back"), # Renamed
374
+ FunctionTool.from_defaults(fn=close_popups, name="close_popups"),
375
+ ]
376
+ for tool in self.browser_tools:
377
+ tool.metadata.description = f"(Browser) {tool.metadata.description}"
378
+ logger.info(f"Created {len(self.browser_tools)} browser interaction tools.")
379
+
380
+ def _create_search_tools(self):
381
+ self.search_tools = []
382
+
383
+ # Google Search
384
+ google_spec = GoogleSearchToolSpec(key=os.getenv("GOOGLE_API_KEY"), engine=os.getenv("GOOGLE_CSE_ID"))
385
+ if google_spec:
386
+ google_tool = FunctionTool.from_defaults(fn=google_spec.google_search, name="google_search")
387
+ google_tool.metadata.description = "(Search) Execute a Google Custom Search query. Returns structured results."
388
+ self.search_tools.append(google_tool)
389
+
390
+ # Tavily Search
391
+ tavily_spec = TavilyToolSpec(api_key=os.getenv("TAVILY_API_KEY"))
392
+ if tavily_spec:
393
+ # Use search method which is more general
394
+ tavily_tool = FunctionTool.from_defaults(fn=tavily_spec.search, name="tavily_search")
395
+ tavily_tool.metadata.description = "(Search) Perform a deep research search using Tavily API. Good for finding documents/articles."
396
+ self.search_tools.append(tavily_tool)
397
+
398
+ # DuckDuckGo Search
399
+ ddg_spec = DuckDuckGoSearchToolSpec()
400
+ if ddg_spec:
401
+ ddg_tool = FunctionTool.from_defaults(fn=ddg_spec.duckduckgo_full_search, name="duckduckgo_search")
402
+ ddg_tool.metadata.description = "(Search) Execute a DuckDuckGo search. Returns structured results."
403
+ self.search_tools.append(ddg_tool)
404
+
405
+ logger.info(f"Created {len(self.search_tools)} search engine tools.")
406
+
407
+ def _create_datasource_tools(self):
408
+ self.datasource_tools = []
409
+
410
+ # Wikipedia
411
+ wiki_spec = WikipediaToolSpec()
412
+ if wiki_spec:
413
+ wiki_search_tool = FunctionTool.from_defaults(fn=wiki_spec.search_data, name="wikipedia_search_pages")
414
+ wiki_search_tool.metadata.description = "(Wikipedia) Search for Wikipedia page titles matching a query."
415
+ wiki_load_tool = FunctionTool.from_defaults(fn=wiki_spec.load_data, name="wikipedia_load_page")
416
+ wiki_load_tool.metadata.description = "(Wikipedia) Load the full content of a specific Wikipedia page title."
417
+ self.datasource_tools.extend([wiki_search_tool, wiki_load_tool])
418
+
419
+ # Yahoo Finance
420
+ yf_spec = YahooFinanceToolSpec()
421
+ if yf_spec:
422
+ yf_tools_map = {
423
+ "balance_sheet": "Get the latest balance sheet for a stock ticker.",
424
+ "income_statement": "Get the latest income statement for a stock ticker.",
425
+ "cash_flow": "Get the latest cash flow statement for a stock ticker.",
426
+ "stock_basic_info": "Get basic info (price, market cap, summary) for a stock ticker.",
427
+ "stock_analyst_recommendations": "Get analyst recommendations for a stock ticker.",
428
+ "stock_news": "Get recent news headlines for a stock ticker."
429
+ }
430
+ for func_name, desc in yf_tools_map.items():
431
+ if hasattr(yf_spec, func_name):
432
+ tool = FunctionTool.from_defaults(fn=getattr(yf_spec, func_name), name=f"yahoo_finance_{func_name}")
433
+ tool.metadata.description = f"(YahooFinance) {desc}"
434
+ self.datasource_tools.append(tool)
435
+ else:
436
+ logger.warning(f"YahooFinance function {func_name} not found in spec.")
437
+
438
+ # ArXiv
439
+ arxiv_spec = ArxivToolSpec()
440
+ if arxiv_spec:
441
+ arxiv_tool = FunctionTool.from_defaults(fn=arxiv_spec.arxiv_query, name="arxiv_search")
442
+ arxiv_tool.metadata.description = "(ArXiv) Search ArXiv for academic papers matching a query."
443
+ self.datasource_tools.append(arxiv_tool)
444
+
445
+ logger.info(f"Created {len(self.datasource_tools)} specific data source tools.")
446
+
447
+ def _create_youtube_tool(self): # Added method
448
+ if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
449
+ self.youtube_tool = FunctionTool.from_defaults(
450
+ fn=get_youtube_transcript,
451
+ name="get_youtube_transcript",
452
+ description=(
453
+ "(YouTube) Fetches the transcript text for a given YouTube video URL or video ID. "
454
+ "Specify preferred languages (e.g., [\"en\", \"es\"]). Returns transcript or error."
455
+ )
456
+ )
457
+ logger.info("Created YouTube transcript tool.")
458
+ else:
459
+ self.youtube_tool = None
460
+ logger.warning("YouTube transcript tool disabled because youtube-transcript-api is not installed.")
461
+
462
+ def get_agent(self) -> ReActAgent:
463
+ """Creates and returns the configured ReActAgent for research."""
464
+ logger.info("Creating ResearchAgent ReActAgent instance...")
465
+
466
+ all_tools = self.browser_tools + self.search_tools + self.datasource_tools
467
+ if self.youtube_tool: # Add YouTube tool if available
468
+ all_tools.append(self.youtube_tool)
469
+
470
+ if not all_tools:
471
+ logger.warning("No tools available for ResearchAgent. It will likely be unable to function.")
472
+
473
+ # System prompt (consider loading from file)
474
+ # Updated prompt to include YouTube tool
475
+ system_prompt = """\
476
+ You are ResearchAgent, an autonomous web research assistant. Your goal is to gather information accurately and efficiently using the available tools.
477
+
478
+ Available Tool Categories:
479
+ - (Browser): Tools for direct web page interaction (visiting URLs, clicking, scrolling, extracting text/HTML, inputting text).
480
+ - (Search): Tools for querying search engines (Google, DuckDuckGo, Tavily).
481
+ - (Wikipedia): Tools for searching and loading Wikipedia pages.
482
+ - (YahooFinance): Tools for retrieving financial data (balance sheets, income statements, stock info, news).
483
+ - (ArXiv): Tool for searching academic papers on ArXiv.
484
+ - (YouTube): Tool for fetching video transcripts (`get_youtube_transcript`).
485
+
486
+ Workflow:
487
+ 1. **Thought**: Analyze the research goal. Break it down if necessary. Choose the *single best tool* for the *next immediate step*. Explain your choice. Consider the information needed and which tool provides it most directly (e.g., use YahooFinance for stock prices, Google/DDG for general web search, Tavily for document search, ArXiv for papers, Wikipedia for encyclopedic info, YouTube for video transcripts, Browser tools for specific website interaction).
488
+ 2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements (e.g., URL or video ID for YouTube).
489
+ 3. **Observation**: Examine the tool's output. Extract the relevant information. Check for errors.
490
+ 4. **Reflect & Iterate**: Does the observation satisfy the immediate goal? Do you have enough information for the overall research task? If not, return to step 1 (Thought) to plan the *next* single step. If a tool failed, consider why and try an alternative tool or approach.
491
+ 5. **Synthesize**: Once all necessary information is gathered, synthesize the findings into a coherent answer to the original research goal.
492
+ 6. **Hand-Off**: Pass the synthesized findings to the appropriate next agent: **code_agent** (for coding), **math_agent** (for math), **text_analyzer_agent** (for text analysis), **planner_agent** (for planning/synthesis), or **reasoning_agent** (for logic/reasoning).
493
+
494
+ Constraints:
495
+ - Use only one tool per Action step.
496
+ - Think step-by-step.
497
+ - If using browser tools, start with `visit_url`.
498
+ - Be mindful of potential errors and try alternative tools if one fails.
499
+ - Synthesize results *before* handing off.
500
+ """
501
+
502
+ agent = ReActAgent(
503
+ name="research_agent",
504
+ description=(
505
+ "Performs web research using browser interaction, search engines (Google, DDG, Tavily), "
506
+ "specific data sources (Wikipedia, YahooFinance, ArXiv), and YouTube transcript fetching. Follows Thought-Action-Observation loop."
507
+ ),
508
+ tools=all_tools,
509
+ llm=self.llm,
510
+ system_prompt=system_prompt,
511
+ can_handoff_to=[
512
+ "code_agent",
513
+ "math_agent",
514
+ "text_analyzer_agent", # Added based on original prompt
515
+ "planner_agent",
516
+ "reasoning_agent"
517
+ ],
518
+ )
519
+ logger.info("ResearchAgent ReActAgent instance created.")
520
+ return agent
521
+
522
+ def close_browser(self):
523
+ """Closes the browser instance if it was initialized."""
524
+ global _browser_instance, _browser_driver
525
+ if _browser_instance:
526
+ logger.info("Closing browser instance...")
527
+ try:
528
+ kill_browser() # Use Helium's function
529
+ logger.info("Browser closed successfully.")
530
+ except Exception as e:
531
+ logger.error(f"Error closing browser: {e}", exc_info=True)
532
+ finally:
533
+ _browser_instance = None
534
+ _browser_driver = None
535
+ else:
536
+ logger.info("No active browser instance to close.")
537
+
538
+ # --- Singleton Initializer Instance ---
539
+ _research_agent_initializer_instance = None
540
+
541
+ def get_research_initializer():
542
+ """Gets the singleton instance of ResearchAgentInitializer."""
543
+ global _research_agent_initializer_instance
544
+ if _research_agent_initializer_instance is None:
545
+ logger.info("Instantiating ResearchAgentInitializer for the first time.")
546
+ _research_agent_initializer_instance = ResearchAgentInitializer()
547
+ return _research_agent_initializer_instance
548
+
549
+ # --- Public Initialization Function ---
550
+ def initialize_research_agent() -> ReActAgent:
551
+ """Initializes and returns the Research Agent using a singleton initializer."""
552
+ logger.info("initialize_research_agent called.")
553
+ initializer = get_research_initializer()
554
+ return initializer.get_agent()
555
+
556
+ # --- Cleanup Function (Optional but recommended) ---
557
+ def cleanup_research_agent_resources():
558
+ """Cleans up resources used by the research agent, like the browser."""
559
+ logger.info("Cleaning up research agent resources...")
560
+ initializer = get_research_initializer() # Ensure it exists
561
+ initializer.close_browser()
562
+
563
+ # Example usage (for testing if run directly)
564
+ if __name__ == "__main__":
565
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
566
+ logger.info("Running research_agent.py directly for testing...")
567
+
568
+ # Check required keys
569
+ required_keys = ["GEMINI_API_KEY"] # Others are optional depending on tools needed
570
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
571
+ if missing_keys:
572
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
573
+ else:
574
+ # Warn about optional keys
575
+ optional_keys = ["GOOGLE_API_KEY", "GOOGLE_CSE_ID", "TAVILY_API_KEY", "WOLFRAM_ALPHA_APP_ID"]
576
+ missing_optional = [key for key in optional_keys if not os.getenv(key)]
577
+ if missing_optional:
578
+ print(f"Warning: Optional environment variable(s) not set: {', '.join(missing_optional)}. Some tools may be unavailable.")
579
+
580
+ test_agent = None
581
+ try:
582
+ # Test YouTube transcript tool directly
583
+ if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
584
+ print("\nTesting YouTube transcript tool...")
585
+ # Example video: "Attention is All You Need" paper explanation
586
+ yt_url = "https://www.youtube.com/watch?v=TQQlZhbC5ps"
587
+ transcript = get_youtube_transcript(yt_url)
588
+ if not transcript.startswith("Error:"):
589
+ print(f"Transcript fetched (first 500 chars):\n{transcript[:500]}...")
590
+ else:
591
+ print(f"YouTube Transcript Fetch Failed: {transcript}")
592
+ else:
593
+ print("\nSkipping YouTube transcript test as youtube-transcript-api is not available.")
594
+
595
+ # Initialize agent AFTER testing standalone functions
596
+ test_agent = initialize_research_agent()
597
+ print("\nResearch Agent initialized successfully for testing.")
598
+
599
+ # Example test (requires browser tools to be available)
600
+ # if SELENIUM_AVAILABLE:
601
+ # print("\nTesting browser visit...")
602
+ # result = test_agent.chat("Visit https://example.com and tell me the main heading text using CSS selector 'h1'")
603
+ # print(f"Test query result: {result}")
604
+ # else:
605
+ # print("\nSkipping browser test as Selenium/Helium are not available.")
606
+
607
+ # Example search test (requires GOOGLE keys)
608
+ # if os.getenv("GOOGLE_API_KEY") and os.getenv("GOOGLE_CSE_ID"):
609
+ # print("\nTesting Google Search...")
610
+ # result_search = test_agent.chat("Search for 'LlamaIndex Agent Workflow'")
611
+ # print(f"Search test result: {result_search}")
612
+ # else:
613
+ # print("\nSkipping Google Search test as API keys are not set.")
614
+
615
+ except Exception as e:
616
+ print(f"Error during testing: {e}")
617
+ finally:
618
+ # Clean up browser if it was started
619
+ if test_agent:
620
+ print("\nCleaning up resources...")
621
+ cleanup_research_agent_resources()
622
+
agents/role_agent.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from dotenv import load_dotenv
4
+
5
+ import datasets
6
+ from llama_index.core import Document, VectorStoreIndex
7
+ from llama_index.core.agent.workflow import ReActAgent
8
+ from llama_index.core.retrievers import QueryFusionRetriever
9
+ from llama_index.core.retrievers.fusion_retriever import FUSION_MODES
10
+ from llama_index.core.tools import FunctionTool
11
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
12
+ from llama_index.core.node_parser import SentenceSplitter
13
+ from llama_index.core.postprocessor import SentenceTransformerRerank
14
+ from llama_index.llms.google_genai import GoogleGenAI
15
+ from llama_index.retrievers.bm25 import BM25Retriever
16
+
17
+ # Load environment variables
18
+ load_dotenv()
19
+
20
+ # Setup logging
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # --- Tool Function ---
24
+ # Note: This function now relies on being bound to an instance of RoleAgentInitializer
25
+ # or having retriever/reranker passed to it.
26
+ # We will bind it to the instance method within the class.
27
+
28
+ # --- Initializer Class ---
29
+ class RoleAgentInitializer:
30
+ def __init__(self):
31
+ logger.info("Initializing RoleAgent resources...")
32
+ # Configuration from environment variables
33
+ self.embed_model_name = os.getenv("ROLE_EMBED_MODEL", "Snowflake/snowflake-arctic-embed-l-v2.0")
34
+ self.reranker_model_name = os.getenv("ROLE_RERANKER_MODEL", "Alibaba-NLP/gte-multilingual-reranker-base")
35
+ self.dataset_name = os.getenv("ROLE_PROMPT_DATASET", "fka/awesome-chatgpt-prompts")
36
+ self.llm_model_name = os.getenv("ROLE_LLM_MODEL", "models/gemini-1.5-pro")
37
+ self.gemini_api_key = os.getenv("GEMINI_API_KEY")
38
+
39
+ if not self.gemini_api_key:
40
+ logger.error("GEMINI_API_KEY not found in environment variables.")
41
+ raise ValueError("GEMINI_API_KEY must be set")
42
+
43
+ # Initialize models and components
44
+ try:
45
+ logger.info(f"Loading embed model: {self.embed_model_name}")
46
+ self.embed_model = HuggingFaceEmbedding(model_name=self.embed_model_name)
47
+
48
+ logger.info(f"Loading reranker model: {self.reranker_model_name}")
49
+ self.reranker = SentenceTransformerRerank(
50
+ model=self.reranker_model_name,
51
+ top_n=3
52
+ )
53
+
54
+ # Load the dataset
55
+ logger.info(f"Loading dataset: {self.dataset_name}")
56
+ prompts_dataset = datasets.load_dataset(self.dataset_name, split="train")
57
+
58
+ # Convert the dataset to a list of Documents
59
+ logger.info("Converting dataset to LlamaIndex Documents...")
60
+ documents = [
61
+ Document(
62
+ text="\n".join([
63
+ f"Act: {prompts_dataset['act'][i]}",
64
+ f"Prompt: {prompts_dataset['prompt'][i]}",
65
+ ]),
66
+ metadata={"act": prompts_dataset["act"][i]}
67
+ )
68
+ for i in range(len(prompts_dataset))
69
+ ]
70
+
71
+ splitter = SentenceSplitter(chunk_size=256, chunk_overlap=20)
72
+
73
+ logger.info("Building vector index (this may take time)...")
74
+ index = VectorStoreIndex.from_documents(
75
+ documents,
76
+ embed_model=self.embed_model,
77
+ show_progress=True,
78
+ transformations=[splitter]
79
+ )
80
+ logger.info("Vector index built.")
81
+
82
+ logger.info("Building BM25 retriever...")
83
+ bm25_retriever = BM25Retriever.from_defaults(
84
+ docstore=index.docstore,
85
+ similarity_top_k=2
86
+ )
87
+ vector_retriever = index.as_retriever(similarity_top_k=2)
88
+
89
+ logger.info("Building query fusion retriever...")
90
+ self.retriever = QueryFusionRetriever(
91
+ [vector_retriever, bm25_retriever],
92
+ similarity_top_k=2,
93
+ mode=FUSION_MODES.RECIPROCAL_RANK,
94
+ verbose=True,
95
+ )
96
+ logger.info("RoleAgent resources initialized successfully.")
97
+
98
+ except Exception as e:
99
+ logger.error(f"Error during RoleAgent resource initialization: {e}", exc_info=True)
100
+ raise
101
+
102
+ def role_prompt_retriever_method(self, query: str) -> str:
103
+ """
104
+ Instance method to retrieve and return detailed role or task information.
105
+ Uses the retriever and reranker initialized in this class instance.
106
+ Args:
107
+ query (str): The user query describing the desired role, task, or prompt context.
108
+ Returns:
109
+ str: A string containing the assigned role/task description, or a message indicating no matching prompt was found.
110
+ """
111
+ logger.info(f"Role prompt retriever called with query: {query[:100]}...")
112
+ try:
113
+ results = self.retriever.retrieve(query)
114
+ reranked_results = self.reranker.postprocess_nodes(results, query_str=query)
115
+ if reranked_results:
116
+ # Return top 3 results as per original logic
117
+ top_results_text = "\n\n".join([node.get_content() for node in reranked_results[:3]])
118
+ logger.info(f"Retrieved and reranked {len(reranked_results)} results. Returning top 3.")
119
+ return top_results_text
120
+ else:
121
+ logger.warning("No matching role prompt found after reranking.")
122
+ return "No matching role prompt found."
123
+ except Exception as e:
124
+ logger.error(f"Error during role prompt retrieval: {e}", exc_info=True)
125
+ return f"Error retrieving role prompt: {e}"
126
+
127
+ def get_agent(self) -> ReActAgent:
128
+ """Creates and returns the configured ReActAgent for role selection."""
129
+ logger.info("Creating RoleAgent ReActAgent instance...")
130
+
131
+ # Create the tool, binding the method to this instance
132
+ role_prompt_retriever_tool = FunctionTool.from_defaults(
133
+ fn=self.role_prompt_retriever_method, # Use the instance method
134
+ name="role_prompt_retriever",
135
+ description="Retrieve and summarize the top three role or task prompts for "
136
+ "a query using BM25 and embedding retrieval with reranking.",
137
+ )
138
+
139
+ # System prompt (consider loading from file in future)
140
+ system_prompt = """\
141
+ You are RoleAgent, an expert context‐setter that interprets user inputs and deterministically assigns the most fitting persona or task schema to guide downstream agents. For every query:
142
+
143
+ 1. **Interpret Intent**: Parse the user’s instruction to understand their goal, domain, and required expertise.
144
+ 2. **Retrieve & Rank**: Use the `role_prompt_retriever` tool to fetch the top role descriptions relevant to the intent.
145
+ 3. **Select Role**: Based *only* on the retrieved results, choose the single best‐matching persona (e.g. “Developer Assistant,” “SEO Strategist,” “Translation Engine,” “Terminal Emulator”) without asking the user any follow-up. If no relevant role is found, state that clearly.
146
+ 4. **Respond**: Output in plain text with:
147
+ - **Role**: The selected persona (or "None Found").
148
+ - **Reason**: Briefly explain why this role was chosen based *only* on the retrieved text.
149
+ - **Prompt**: The corresponding role prompt from the retrieved text to be used by downstream agents (or "N/A" if none found).
150
+
151
+ 5. **Hand-Off**: Immediately after including the chosen prompt (or N/A) in your response, invoke `planner_agent` to begin breaking down the user’s request into actionable sub-questions.
152
+
153
+ Always conclude your response with the full prompt for the next agent (or "N/A") and the invocation instruction for `planner_agent`.
154
+ """
155
+
156
+ llm = GoogleGenAI(
157
+ api_key=self.gemini_api_key,
158
+ model=self.llm_model_name,
159
+ )
160
+
161
+ agent = ReActAgent(
162
+ name="role_agent",
163
+ description=(
164
+ "RoleAgent selects the most appropriate persona or task template based on the user’s query. "
165
+ "By evaluating the question’s intent and context using a specialized retriever, it chooses or refines a prompt that aligns "
166
+ "with the best-fitting role—whether developer, analyst, translator, planner, or otherwise—so that "
167
+ "subsequent agents can respond effectively under the optimal role context."
168
+ ),
169
+ tools=[role_prompt_retriever_tool],
170
+ llm=llm,
171
+ system_prompt=system_prompt,
172
+ can_handoff_to=["planner_agent"],
173
+ )
174
+ logger.info("RoleAgent ReActAgent instance created.")
175
+ return agent
176
+
177
+ # --- Global Initializer Instance (Singleton Pattern) ---
178
+ # Instantiate the initializer once when the module is loaded.
179
+ # This ensures expensive operations (model loading, index building) happen only once.
180
+ _role_agent_initializer_instance = None
181
+
182
+ def get_initializer():
183
+ global _role_agent_initializer_instance
184
+ if _role_agent_initializer_instance is None:
185
+ logger.info("Instantiating RoleAgentInitializer for the first time.")
186
+ _role_agent_initializer_instance = RoleAgentInitializer()
187
+ return _role_agent_initializer_instance
188
+
189
+ # --- Public Initialization Function ---
190
+ def initialize_role_agent() -> ReActAgent:
191
+ """Initializes and returns the Role Agent.
192
+ Uses a singleton pattern to ensure resources are loaded only once.
193
+ """
194
+ logger.info("initialize_role_agent called.")
195
+ initializer = get_initializer()
196
+ return initializer.get_agent()
197
+
198
+ # Example usage (for testing if run directly)
199
+ if __name__ == "__main__":
200
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
201
+ logger.info("Running role_agent.py directly for testing...")
202
+
203
+ # Ensure API key is set for testing
204
+ if not os.getenv("GEMINI_API_KEY"):
205
+ print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
206
+ else:
207
+ try:
208
+ test_agent = initialize_role_agent()
209
+ print("Role Agent initialized successfully for testing.")
210
+ # You could add a simple test query here if needed
211
+ # e.g., result = test_agent.chat("act as a linux terminal")
212
+ # print(f"Test query result: {result}")
213
+ except Exception as e:
214
+ print(f"Error during testing: {e}")
215
+
agents/text_analyzer_agent.py ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import certifi
3
+ import logging
4
+ import subprocess # For calling ffmpeg if needed
5
+ from typing import List, Dict, Optional
6
+ from dotenv import load_dotenv
7
+
8
+ from llama_index.core.agent.workflow import ReActAgent
9
+ from llama_index.core.tools import FunctionTool
10
+ from llama_index.llms.google_genai import GoogleGenAI
11
+ from llama_index.core.node_parser import SentenceSplitter
12
+ from llama_index.core import Document
13
+
14
+ # Attempt to import Whisper
15
+ try:
16
+ import whisper
17
+ WHISPER_AVAILABLE = True
18
+ except ImportError:
19
+ logging.warning("openai-whisper not installed. Audio transcription tool will be unavailable.")
20
+ WHISPER_AVAILABLE = False
21
+
22
+ # Load environment variables
23
+ load_dotenv()
24
+
25
+ # Setup logging
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Global Whisper model instance (lazy loaded)
29
+ _whisper_model = None
30
+
31
+ os.environ["SSL_CERT_FILE"] = certifi.where()
32
+
33
+ # Helper function to load prompt from file
34
+ def load_prompt_from_file(filename: str, default_prompt: str) -> str:
35
+ """Loads a prompt from a text file."""
36
+ try:
37
+ script_dir = os.path.dirname(__file__)
38
+ prompt_path = os.path.join(script_dir, filename)
39
+ with open(prompt_path, "r") as f:
40
+ prompt = f.read()
41
+ logger.info(f"Successfully loaded prompt from {prompt_path}")
42
+ return prompt
43
+ except FileNotFoundError:
44
+ logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
45
+ return default_prompt
46
+ except Exception as e:
47
+ logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
48
+ return default_prompt
49
+
50
+ # --- Helper function to load Whisper model ---
51
+ def _load_whisper_model(model_size: str = "small") -> Optional[object]:
52
+ """Loads the Whisper model instance, lazy loading."""
53
+ global _whisper_model
54
+ if not WHISPER_AVAILABLE:
55
+ logger.error("Whisper library not available, cannot load model.")
56
+ return None
57
+
58
+ if _whisper_model is None:
59
+ try:
60
+ logger.info(f"Loading Whisper model: {model_size}...")
61
+ # Allow model size selection via env var, default to "base"
62
+ selected_model_size = os.getenv("WHISPER_MODEL_SIZE", model_size)
63
+ print(f"Available Whisper models: {whisper.available_models()}")
64
+ _whisper_model = whisper.load_model(selected_model_size)
65
+ logger.info(f"Whisper model {selected_model_size} loaded successfully.")
66
+ except Exception as e:
67
+ logger.error(f"Failed to load Whisper model {selected_model_size}: {e}", exc_info=True)
68
+ _whisper_model = None # Ensure it remains None on failure
69
+
70
+ return _whisper_model
71
+
72
+ # --- Tool Functions ---
73
+
74
+ def summarize_text(text: str, max_length: int = 150, min_length: int = 30) -> str:
75
+ """Summarize the provided text using an LLM."""
76
+ logger.info(f"Summarizing text (length: {len(text)} chars). Max/Min length: {max_length}/{min_length}")
77
+
78
+ # Configuration for summarization LLM
79
+ summarizer_llm_model = os.getenv("SUMMARIZER_LLM_MODEL", "models/gemini-1.5-flash") # Use flash for speed
80
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
81
+ if not gemini_api_key:
82
+ logger.error("GEMINI_API_KEY not found for summarization tool LLM.")
83
+ return "Error: GEMINI_API_KEY not set for summarization."
84
+
85
+ # Truncate input text if excessively long to avoid API limits/costs
86
+ max_input_chars = 30000 # Example limit, adjust as needed
87
+ if len(text) > max_input_chars:
88
+ logger.warning(f"Input text truncated to {max_input_chars} chars for summarization.")
89
+ text = text[:max_input_chars]
90
+
91
+ prompt = (
92
+ f"Summarize the following text concisely. Aim for a length between {min_length} and {max_length} words. "
93
+ f"Focus on the main points and key information.\n\n"
94
+ f"TEXT:\n{text}\n\nSUMMARY:"
95
+ )
96
+
97
+ try:
98
+ llm = GoogleGenAI(api_key=gemini_api_key, model=summarizer_llm_model)
99
+ logger.info(f"Using summarization LLM: {summarizer_llm_model}")
100
+ response = llm.complete(prompt)
101
+ summary = response.text.strip()
102
+ logger.info(f"Summarization successful (output length: {len(summary.split())} words).")
103
+ return summary
104
+ except Exception as e:
105
+ logger.error(f"LLM call failed during summarization: {e}", exc_info=True)
106
+ return f"Error during summarization: {e}"
107
+
108
+ def extract_entities(text: str, entity_types: List[str] = ["PERSON", "ORG", "GPE", "DATE", "EVENT"]) -> Dict[str, List[str]]:
109
+ """Extract named entities (like people, organizations, locations, dates) from the text using an LLM."""
110
+ logger.info(f"Extracting entities (types: {entity_types}) from text (length: {len(text)} chars).")
111
+
112
+ # Configuration for entity extraction LLM
113
+ entity_llm_model = os.getenv("ENTITY_LLM_MODEL", "models/gemini-1.5-flash") # Use flash for speed
114
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
115
+ if not gemini_api_key:
116
+ logger.error("GEMINI_API_KEY not found for entity extraction tool LLM.")
117
+ return {"error": "GEMINI_API_KEY not set for entity extraction."}
118
+
119
+ # Truncate input text if excessively long
120
+ max_input_chars = 30000 # Example limit
121
+ if len(text) > max_input_chars:
122
+ logger.warning(f"Input text truncated to {max_input_chars} chars for entity extraction.")
123
+ text = text[:max_input_chars]
124
+
125
+ # Define the desired output format clearly in the prompt
126
+ prompt = (
127
+ f"Extract named entities from the following text. Identify entities of these types: {', '.join(entity_types)}. "
128
+ f"Format the output as a JSON object where keys are the entity types (uppercase) and values are lists of unique strings found for that type. "
129
+ f"If no entities of a type are found, include the key with an empty list.\n\n"
130
+ f"TEXT:\n{text}\n\nJSON_OUTPUT:"
131
+ )
132
+
133
+ try:
134
+ llm = GoogleGenAI(api_key=gemini_api_key, model=entity_llm_model, response_mime_type="application/json") # Request JSON output
135
+ logger.info(f"Using entity extraction LLM: {entity_llm_model}")
136
+ response = llm.complete(prompt)
137
+
138
+ # Attempt to parse the JSON response
139
+ import json
140
+ try:
141
+ # The response might be wrapped in ```json ... ```, try to extract it
142
+ json_str = response.text.strip()
143
+ if json_str.startswith("```json"):
144
+ json_str = json_str[7:]
145
+ if json_str.endswith("```"):
146
+ json_str = json_str[:-3]
147
+
148
+ entities = json.loads(json_str.strip())
149
+ # Validate structure (optional but good practice)
150
+ if not isinstance(entities, dict):
151
+ raise ValueError("LLM response is not a JSON object.")
152
+ # Ensure all requested types are present, even if empty
153
+ for entity_type in entity_types:
154
+ if entity_type not in entities:
155
+ entities[entity_type] = []
156
+ elif not isinstance(entities[entity_type], list):
157
+ logger.warning(f"Entity type {entity_type} value is not a list, converting.")
158
+ entities[entity_type] = [str(entities[entity_type])] # Attempt conversion
159
+
160
+ logger.info(f"Entity extraction successful. Found entities: { {k: len(v) for k, v in entities.items()} }")
161
+ return entities
162
+ except json.JSONDecodeError as json_err:
163
+ logger.error(f"Failed to parse JSON response from LLM: {json_err}. Response text: {response.text}")
164
+ return {"error": f"Failed to parse LLM JSON response: {json_err}"}
165
+ except ValueError as val_err:
166
+ logger.error(f"Invalid JSON structure from LLM: {val_err}. Response text: {response.text}")
167
+ return {"error": f"Invalid JSON structure from LLM: {val_err}"}
168
+
169
+ except Exception as e:
170
+ logger.error(f"LLM call failed during entity extraction: {e}", exc_info=True)
171
+ return {"error": f"Error during entity extraction: {e}"}
172
+
173
+ def split_text_into_chunks(text: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[str]:
174
+ """Split a long text into smaller chunks suitable for processing."""
175
+ logger.info(f"Splitting text (length: {len(text)} chars) into chunks (size: {chunk_size}, overlap: {chunk_overlap}).")
176
+ if not text:
177
+ return []
178
+ try:
179
+ splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
180
+ document = Document(text=text)
181
+ nodes = splitter.get_nodes_from_documents([document])
182
+ chunks = [node.get_content() for node in nodes]
183
+ logger.info(f"Text split into {len(chunks)} chunks.")
184
+ return chunks
185
+ except Exception as e:
186
+ logger.error(f"Error splitting text: {e}", exc_info=True)
187
+ # Fallback to simple splitting if SentenceSplitter fails
188
+ logger.warning("Falling back to simple text splitting.")
189
+ return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
190
+
191
+ def transcribe_audio(audio_file_path: str, language: Optional[str] = None) -> str:
192
+ """Transcribes an audio file using the OpenAI Whisper model.
193
+ Args:
194
+ audio_file_path (str): The path to the audio file (e.g., mp3, wav, m4a).
195
+ language (Optional[str]): The language code (e.g., "en", "es") or full name ("English", "Spanish").
196
+ If None, Whisper will detect the language.
197
+ Returns:
198
+ str: The transcribed text or an error message.
199
+ """
200
+ logger.info(f"Attempting to transcribe audio file: {audio_file_path}, Language: {language}")
201
+
202
+ # Check if Whisper is available
203
+ if not WHISPER_AVAILABLE:
204
+ return "Error: openai-whisper library is required but not installed."
205
+
206
+ # Check if file exists
207
+ if not os.path.exists(audio_file_path):
208
+ logger.error(f"Audio file not found: {audio_file_path}")
209
+ return f"Error: Audio file not found at {audio_file_path}"
210
+
211
+ # Load the Whisper model (lazy loading)
212
+ model = _load_whisper_model() # Uses default size "base" or WHISPER_MODEL_SIZE env var
213
+ if model is None:
214
+ return "Error: Failed to load Whisper model."
215
+
216
+ try:
217
+ # Perform transcription
218
+ # The transcribe function handles various audio formats via ffmpeg
219
+ result = model.transcribe(audio_file_path, language=language)
220
+ transcribed_text = result["text"]
221
+ detected_language = result.get("language", "unknown") # Get detected language if available
222
+ logger.info(f"Audio transcription successful. Detected language: {detected_language}. Text length: {len(transcribed_text)}")
223
+ return transcribed_text
224
+
225
+ except Exception as e:
226
+ # Check if it might be an ffmpeg issue
227
+ if "ffmpeg" in str(e).lower():
228
+ logger.error(f"Error during transcription, possibly ffmpeg issue: {e}", exc_info=True)
229
+ # Check if ffmpeg is installed using shell command
230
+ try:
231
+ subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
232
+ # If ffmpeg is installed, the error is likely something else
233
+ return f"Error during transcription (ffmpeg seems installed): {e}"
234
+ except (FileNotFoundError, subprocess.CalledProcessError):
235
+ logger.error("ffmpeg command not found or failed. Please ensure ffmpeg is installed and in PATH.")
236
+ return "Error: ffmpeg not found or not working. Please install ffmpeg."
237
+ else:
238
+ logger.error(f"Unexpected error during transcription: {e}", exc_info=True)
239
+ return f"Error during transcription: {e}"
240
+
241
+ # --- Tool Definitions ---
242
+ summarize_tool = FunctionTool.from_defaults(
243
+ fn=summarize_text,
244
+ name="summarize_text",
245
+ description=(
246
+ "Summarizes a given block of text. Useful for condensing long documents or articles. "
247
+ "Input: text (str), Optional: max_length (int), min_length (int). Output: summary (str) or error."
248
+ ),
249
+ )
250
+
251
+ extract_entities_tool = FunctionTool.from_defaults(
252
+ fn=extract_entities,
253
+ name="extract_entities",
254
+ description=(
255
+ "Extracts named entities (people, organizations, locations, dates, events) from text. "
256
+ "Input: text (str), Optional: entity_types (List[str]). Output: Dict[str, List[str]] or error dict."
257
+ ),
258
+ )
259
+
260
+ split_text_tool = FunctionTool.from_defaults(
261
+ fn=split_text_into_chunks,
262
+ name="split_text_into_chunks",
263
+ description=(
264
+ "Splits a long text document into smaller, overlapping chunks. "
265
+ "Input: text (str), Optional: chunk_size (int), chunk_overlap (int). Output: List[str] of chunks."
266
+ ),
267
+ )
268
+
269
+ # Conditionally create transcribe_audio_tool
270
+ transcribe_audio_tool = None
271
+ if WHISPER_AVAILABLE:
272
+ transcribe_audio_tool = FunctionTool.from_defaults(
273
+ fn=transcribe_audio,
274
+ name="transcribe_audio_file",
275
+ description=(
276
+ "Transcribes speech from an audio file (e.g., mp3, wav, m4a) into text using Whisper. "
277
+ "Input: audio_file_path (str), Optional: language (str - e.g., \"en\", \"Spanish\"). "
278
+ "Output: transcribed text (str) or error message."
279
+ ),
280
+ )
281
+ logger.info("Audio transcription tool created.")
282
+ else:
283
+ logger.warning("Audio transcription tool disabled because openai-whisper is not installed.")
284
+
285
+ # --- Agent Initialization ---
286
+ def initialize_text_analyzer_agent() -> ReActAgent:
287
+ """Initializes the Text Analyzer Agent."""
288
+ logger.info("Initializing TextAnalyzerAgent...")
289
+
290
+ # Configuration for the agent's main LLM
291
+ agent_llm_model = os.getenv("TEXT_ANALYZER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
292
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
293
+
294
+ if not gemini_api_key:
295
+ logger.error("GEMINI_API_KEY not found for TextAnalyzerAgent.")
296
+ raise ValueError("GEMINI_API_KEY must be set for TextAnalyzerAgent")
297
+
298
+ try:
299
+ llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
300
+ logger.info(f"Using agent LLM: {agent_llm_model}")
301
+
302
+ # Load system prompt
303
+ default_system_prompt = ("You are TextAnalyzerAgent... [Default prompt content - replace with actual]" # Placeholder
304
+ )
305
+ system_prompt = load_prompt_from_file("../prompts/text_analyzer_prompt.txt", default_system_prompt)
306
+ if system_prompt == default_system_prompt:
307
+ logger.warning("Using default/fallback system prompt for TextAnalyzerAgent.")
308
+
309
+ # Define available tools, including the audio tool if available
310
+ tools = [summarize_tool, extract_entities_tool, split_text_tool]
311
+ if transcribe_audio_tool:
312
+ tools.append(transcribe_audio_tool)
313
+
314
+ # Update agent description based on available tools
315
+ agent_description = (
316
+ "Analyzes text content. Can summarize text (`summarize_text`), extract named entities (`extract_entities`), "
317
+ "and split long texts (`split_text_into_chunks`)."
318
+ )
319
+ if transcribe_audio_tool:
320
+ agent_description += " Can also transcribe audio files to text (`transcribe_audio_file`)."
321
+
322
+ agent = ReActAgent(
323
+ name="text_analyzer_agent",
324
+ description=agent_description,
325
+ tools=tools,
326
+ llm=llm,
327
+ system_prompt=system_prompt,
328
+ can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"], # Example handoffs
329
+ )
330
+ logger.info("TextAnalyzerAgent initialized successfully.")
331
+ return agent
332
+
333
+ except Exception as e:
334
+ logger.error(f"Error during TextAnalyzerAgent initialization: {e}", exc_info=True)
335
+ raise
336
+
337
+ # Example usage (for testing if run directly)
338
+ if __name__ == "__main__":
339
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
340
+ logger.info("Running text_analyzer_agent.py directly for testing...")
341
+
342
+ # Check required keys
343
+ required_keys = ["GEMINI_API_KEY"]
344
+ missing_keys = [key for key in required_keys if not os.getenv(key)]
345
+ if missing_keys:
346
+ print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
347
+ else:
348
+ try:
349
+ # Test summarization
350
+ print("\nTesting summarization...")
351
+ long_text = """The Industrial Revolution, now also known as the First Industrial Revolution, was a period of global transition of the human economy towards more efficient and stable manufacturing processes that succeeded the Agricultural Revolution, starting from Great Britain, continental Europe and the United States, that occurred during the period from around 1760 to about 1820–1840. This transition included going from hand production methods to machines; new chemical manufacturing and iron production processes; the increasing use of water power and steam power; the development of machine tools; and the rise of the mechanized factory system. The Revolution also saw an unprecedented rise in the rate of population growth."""
352
+ summary = summarize_text(long_text, max_length=50)
353
+ print(f"Summary:\n{summary}")
354
+
355
+ # Test entity extraction
356
+ print("\nTesting entity extraction...")
357
+ entities = extract_entities(long_text, entity_types=["EVENT", "GPE", "DATE"])
358
+ print(f"Extracted Entities:\n{entities}")
359
+
360
+ # Test text splitting
361
+ print("\nTesting text splitting...")
362
+ chunks = split_text_into_chunks(long_text * 3, chunk_size=150, chunk_overlap=30) # Make text longer
363
+ print(f"Split into {len(chunks)} chunks. First chunk:\n{chunks[0]}")
364
+
365
+ # Test audio transcription (if available)
366
+ if WHISPER_AVAILABLE:
367
+ print("\nTesting audio transcription...")
368
+ # Create a dummy audio file for testing (requires ffmpeg)
369
+ dummy_file = "dummy_audio.mp3"
370
+ try:
371
+ # Generate a 1-second silent MP3 using ffmpeg
372
+ subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", "1", "-q:a", "9", "-y", dummy_file], check=True, capture_output=True)
373
+ print(f"Created dummy audio file: {dummy_file}")
374
+ transcript = transcribe_audio(dummy_file)
375
+ print(f"Transcription Result: '{transcript}' (Expected: empty or silence markers)")
376
+ os.remove(dummy_file) # Clean up dummy file
377
+ except Exception as ffmpeg_err:
378
+ print(f"Could not create/test dummy audio file (ffmpeg required): {ffmpeg_err}")
379
+ else:
380
+ print("\nSkipping audio transcription test as openai-whisper is not available.")
381
+
382
+ # Initialize the agent (optional)
383
+ # test_agent = initialize_text_analyzer_agent()
384
+ # print("\nText Analyzer Agent initialized successfully for testing.")
385
+
386
+ except Exception as e:
387
+ print(f"Error during testing: {e}")
388
+
agents/verifier_agent.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import re
4
+ from typing import List
5
+ from dotenv import load_dotenv
6
+
7
+ from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
8
+ from llama_index.core.tools import FunctionTool
9
+ from llama_index.llms.google_genai import GoogleGenAI
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Setup logging
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class VerificationError(Exception):
18
+ """Custom exception for verification failures."""
19
+ pass
20
+
21
+ class Verifier:
22
+ """
23
+ Cross-check extracted facts, identify contradictions using LLM,
24
+ and assign a confidence score to each fact.
25
+ """
26
+ def __init__(self):
27
+ """Initializes the Verifier, loading configuration from environment variables."""
28
+ logger.info("Initializing Verifier...")
29
+ self.threshold = float(os.getenv("VERIFIER_CONFIDENCE_THRESHOLD", 0.7))
30
+ self.verifier_llm_model = os.getenv("VERIFIER_LLM_MODEL", "models/gemini-2.0-flash") # For scoring
31
+ self.agent_llm_model = os.getenv("VERIFIER_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # For agent logic & contradiction
32
+ self.gemini_api_key = os.getenv("GEMINI_API_KEY")
33
+
34
+ if not self.gemini_api_key:
35
+ logger.error("GEMINI_API_KEY not found in environment variables.")
36
+ raise ValueError("GEMINI_API_KEY must be set")
37
+
38
+ try:
39
+ self.verifier_llm = GoogleGenAI(
40
+ api_key=self.gemini_api_key,
41
+ model=self.verifier_llm_model,
42
+ )
43
+ self.agent_llm = GoogleGenAI(
44
+ api_key=self.gemini_api_key,
45
+ model=self.agent_llm_model,
46
+ )
47
+ logger.info(f"Verifier initialized with threshold {self.threshold}, verifier LLM {self.verifier_llm_model}, agent LLM {self.agent_llm_model}")
48
+ except Exception as e:
49
+ logger.error(f"Error initializing Verifier LLMs: {e}", exc_info=True)
50
+ raise
51
+
52
+ def verify_facts(self, facts: List[str]) -> List[str]:
53
+ """
54
+ Assign a confidence score via LLM to each fact and return formatted strings.
55
+
56
+ Args:
57
+ facts (List[str]): Facts to verify.
58
+
59
+ Returns:
60
+ List[str]: Each item is "fact: score" with score ∈ [threshold, 1.0].
61
+
62
+ Raises:
63
+ VerificationError: If LLM call fails.
64
+ """
65
+ logger.info(f"Verifying {len(facts)} facts...")
66
+ results: List[str] = []
67
+ for fact in facts:
68
+ prompt = (
69
+ "You are a fact verifier. "
70
+ "On a scale from 0.00 to 1.00, where any value below "
71
+ f"{self.threshold:.2f} indicates low confidence, rate the following statement’s trustworthiness. "
72
+ "Respond with **only** a decimal number rounded to two digits (e.g., 0.82) and no extra text.\n\n"
73
+ f"Statement: \"{fact}\""
74
+ )
75
+ try:
76
+ response = self.verifier_llm.complete(prompt)
77
+ score_text = response.text.strip()
78
+ # Try direct conversion first
79
+ try:
80
+ score = float(score_text)
81
+ except ValueError:
82
+ # Fallback: extract first float if model returns extra text
83
+ match = re.search(r"0?\.\d+|1(?:\.0+)?", score_text)
84
+ if match:
85
+ score = float(match.group(0))
86
+ logger.warning(f"Extracted score {score} from noisy LLM response: {score_text}")
87
+ else:
88
+ logger.error(f"Could not parse score from LLM response: {score_text}. Using threshold {self.threshold}.")
89
+ score = self.threshold # Fallback to threshold if parsing fails completely
90
+
91
+ # Enforce threshold floor
92
+ if score < self.threshold:
93
+ logger.info(f"Score {score:.2f} for fact {fact} below threshold {self.threshold}, raising to threshold.")
94
+ score = self.threshold
95
+
96
+ results.append(f"{fact}: {score:.2f}")
97
+
98
+ except Exception as e:
99
+ logger.error(f"LLM call failed during fact verification for {fact}: {e}", exc_info=True)
100
+ # Option 1: Raise an error
101
+ # raise VerificationError(f"LLM call failed for fact: {fact}") from e
102
+ # Option 2: Append an error message (current approach)
103
+ results.append(f"{fact}: ERROR - Verification failed")
104
+ # Option 3: Assign lowest score
105
+ # results.append(f"{fact}: {self.threshold:.2f} (Verification Error)")
106
+
107
+ logger.info(f"Fact verification complete. {len(results)} results generated.")
108
+ return results
109
+
110
+ def find_contradictions_llm(self, facts: List[str]) -> List[str]:
111
+ """
112
+ Identify contradictions among a list of facts using an LLM.
113
+
114
+ Args:
115
+ facts (List[str]): List of fact strings.
116
+
117
+ Returns:
118
+ List[str]: Pairs of facts detected as contradictory, joined by " <> ".
119
+
120
+ Raises:
121
+ VerificationError: If LLM call fails.
122
+ """
123
+ logger.info(f"Finding contradictions in {len(facts)} facts using LLM...")
124
+ if len(facts) < 2:
125
+ logger.info("Not enough facts to find contradictions.")
126
+ return []
127
+
128
+ facts_numbered = "\n".join([f"{i+1}. {fact}" for i, fact in enumerate(facts)])
129
+
130
+ prompt = (
131
+ "You are a logical reasoning assistant. Analyze the following numbered list of statements. "
132
+ "Identify any pairs of statements that directly contradict each other. "
133
+ "List *only* the numbers of the contradicting pairs, one pair per line, formatted as 'X, Y'. "
134
+ "If no contradictions are found, respond with 'None'. Do not include any other text or explanation.\n\n"
135
+ f"Statements:\n{facts_numbered}"
136
+ )
137
+
138
+ try:
139
+ response = self.agent_llm.complete(prompt) # Use the more powerful agent LLM
140
+ response_text = response.text.strip()
141
+ logger.info(f"LLM response for contradictions: {response_text}")
142
+
143
+ if response_text.lower() == 'none':
144
+ logger.info("LLM reported no contradictions.")
145
+ return []
146
+
147
+ contradiction_pairs = []
148
+ lines = response_text.split("\n")
149
+ for line in lines:
150
+ line = line.strip()
151
+ if not line:
152
+ continue
153
+ try:
154
+ # Expect format like "1, 5"
155
+ parts = line.split(',')
156
+ if len(parts) == 2:
157
+ idx1 = int(parts[0].strip()) - 1
158
+ idx2 = int(parts[1].strip()) - 1
159
+
160
+ # Validate indices
161
+ if 0 <= idx1 < len(facts) and 0 <= idx2 < len(facts) and idx1 != idx2:
162
+ # Ensure pair order doesn't matter and avoid duplicates
163
+ pair = tuple(sorted((idx1, idx2)))
164
+ fact1 = facts[pair[0]]
165
+ fact2 = facts[pair[1]]
166
+ contradiction_str = f"{fact1} <> {fact2}"
167
+ if contradiction_str not in contradiction_pairs:
168
+ contradiction_pairs.append(contradiction_str)
169
+ logger.info(f"Identified contradiction: {contradiction_str}")
170
+ else:
171
+ logger.warning(f"Invalid index pair found in LLM contradiction response: {line}")
172
+ else:
173
+ logger.warning(f"Could not parse contradiction pair from LLM response line: {line}")
174
+ except ValueError:
175
+ logger.warning(f"Non-integer index found in LLM contradiction response line: {line}")
176
+ except Exception as parse_err:
177
+ logger.warning(f"Error parsing LLM contradiction response line {line}: {parse_err}")
178
+
179
+ logger.info(f"Contradiction check complete. Found {len(contradiction_pairs)} pairs.")
180
+ return contradiction_pairs
181
+
182
+ except Exception as e:
183
+ logger.error(f"LLM call failed during contradiction detection: {e}", exc_info=True)
184
+ # Option 1: Raise an error
185
+ raise VerificationError("LLM call failed during contradiction detection") from e
186
+ # Option 2: Return empty list (fail silently)
187
+ # return []
188
+
189
+ # --- Tool Definitions ---
190
+ # Tools need to be created within the initialization function to bind to the instance
191
+
192
+ # --- Agent Initialization ---
193
+
194
+ # Store the initializer instance globally to ensure singleton behavior
195
+ _verifier_initializer_instance = None
196
+
197
+ class VerifierInitializer:
198
+ def __init__(self):
199
+ self.verifier = Verifier() # Initialize the Verifier class
200
+ self._create_tools()
201
+
202
+ def _create_tools(self):
203
+ self.verify_facts_tool = FunctionTool.from_defaults(
204
+ fn=self.verifier.verify_facts, # Bind to instance method
205
+ name="verify_facts",
206
+ description=(
207
+ "Assigns a numerical confidence score (based on plausibility and internal consistency) to each factual assertion in a list. "
208
+ "Input: List[str] of statements. Output: List[str] of 'statement: score' pairs."
209
+ ),
210
+ )
211
+
212
+ self.find_contradictions_tool = FunctionTool.from_defaults(
213
+ fn=self.verifier.find_contradictions_llm, # Bind to instance method (using LLM version)
214
+ name="find_contradictions",
215
+ description=(
216
+ "Uses an LLM to detect logical contradictions among a list of statements. "
217
+ "Input: List[str] of factual assertions. "
218
+ "Output: List[str] where each entry is a conflicting pair in the format 'statement1 <> statement2'. Returns empty list if none found."
219
+ )
220
+ )
221
+
222
+ def get_agent(self) -> FunctionAgent:
223
+ """Initializes and returns the Verifier Agent."""
224
+ logger.info("Creating VerifierAgent FunctionAgent instance...")
225
+
226
+ # System prompt (consider loading from file)
227
+ system_prompt = """\
228
+ You are VerifierAgent, a fact verification assistant. Given a list of factual statements, you must:
229
+
230
+ 1. **Verify Facts**: Call `verify_facts` to assign a confidence score to each statement.
231
+ 2. **Detect Contradictions**: Call `find_contradictions` to identify logical conflicts between the statements using an LLM.
232
+ 3. **Present Results**: Output clear bullet points listing each fact with its confidence score, followed by a list of any detected contradictions.
233
+ 4. **Hand-Off**: If significant contradictions or low-confidence facts are found that require deeper analysis, hand off to **reasoning_agent**. Otherwise, pass the verified facts and contradiction summary to **planner_agent** for integration.
234
+ """
235
+
236
+ agent = FunctionAgent(
237
+ name="verifier_agent",
238
+ description=(
239
+ "Evaluates factual statements by assigning confidence scores (`verify_facts`) "
240
+ "and detecting logical contradictions using an LLM (`find_contradictions`). "
241
+ "Hands off to reasoning_agent for complex issues or planner_agent for synthesis."
242
+ ),
243
+ tools=[
244
+ self.verify_facts_tool,
245
+ self.find_contradictions_tool,
246
+ ],
247
+ llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
248
+ system_prompt=system_prompt,
249
+ can_handoff_to=["reasoning_agent", "planner_agent"],
250
+ )
251
+ logger.info("VerifierAgent FunctionAgent instance created.")
252
+ return agent
253
+
254
+ def get_verifier_initializer():
255
+ """Gets the singleton instance of VerifierInitializer."""
256
+ global _verifier_initializer_instance
257
+ if _verifier_initializer_instance is None:
258
+ logger.info("Instantiating VerifierInitializer for the first time.")
259
+ _verifier_initializer_instance = VerifierInitializer()
260
+ return _verifier_initializer_instance
261
+
262
+ def initialize_verifier_agent() -> FunctionAgent:
263
+ """Initializes and returns the Verifier Agent using a singleton initializer."""
264
+ logger.info("initialize_verifier_agent called.")
265
+ initializer = get_verifier_initializer()
266
+ return initializer.get_agent()
267
+
268
+ # Example usage (for testing if run directly)
269
+ if __name__ == "__main__":
270
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
271
+ logger.info("Running verifier_agent.py directly for testing...")
272
+
273
+ # Ensure API key is set for testing
274
+ if not os.getenv("GEMINI_API_KEY"):
275
+ print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
276
+ else:
277
+ try:
278
+ test_agent = initialize_verifier_agent()
279
+ print("Verifier Agent initialized successfully for testing.")
280
+
281
+ # Test contradiction detection
282
+ initializer = get_verifier_initializer()
283
+ test_facts = [
284
+ "The sky is blue.",
285
+ "Water boils at 100 degrees Celsius.",
286
+ "The sky is not blue.",
287
+ "Paris is the capital of France."
288
+ ]
289
+ print(f"\nTesting contradiction detection on: {test_facts}")
290
+ contradictions = initializer.verifier.find_contradictions_llm(test_facts)
291
+ print(f"Detected contradictions: {contradictions}")
292
+
293
+ # Test fact verification
294
+ print(f"\nTesting fact verification on: {test_facts}")
295
+ verified = initializer.verifier.verify_facts(test_facts)
296
+ print(f"Verified facts: {verified}")
297
+
298
+ except Exception as e:
299
+ print(f"Error during testing: {e}")
300
+
app.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import mimetypes
4
+ from dotenv import load_dotenv
5
+
6
+ from typing import Any, List
7
+
8
+ import gradio as gr
9
+ import requests
10
+ import pandas as pd
11
+
12
+ from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, ToolCall, AgentOutput
13
+ from llama_index.core.base.llms.types import ChatMessage, TextBlock, ImageBlock, AudioBlock
14
+
15
+ # Assuming agent initializers are in the same directory or a known path
16
+ # Adjust import paths if necessary based on deployment structure
17
+ try:
18
+ # Existing agents
19
+ from agents.image_analyzer_agent import initialize_image_analyzer_agent
20
+ from agents.reasoning_agent import initialize_reasoning_agent
21
+ from agents.text_analyzer_agent import initialize_text_analyzer_agent
22
+ from agents.code_agent import initialize_code_agent
23
+ from agents.math_agent import initialize_math_agent
24
+ from agents.planner_agent import initialize_planner_agent
25
+ from agents.research_agent import initialize_research_agent
26
+ from agents.role_agent import initialize_role_agent
27
+ from agents.verifier_agent import initialize_verifier_agent
28
+ # New agents
29
+ from agents.advanced_validation_agent import initialize_advanced_validation_agent
30
+ from agents.figure_interpretation_agent import initialize_figure_interpretation_agent
31
+ from agents.long_context_management_agent import initialize_long_context_management_agent
32
+ AGENT_IMPORT_PATH = "local"
33
+ except ImportError as e:
34
+ # Fallback for potential different structures (e.g., nested folder)
35
+ try:
36
+ from final_project.image_analyzer_agent import initialize_image_analyzer_agent
37
+ from final_project.reasoning_agent import initialize_reasoning_agent
38
+ from final_project.text_analyzer_agent import initialize_text_analyzer_agent
39
+ from final_project.code_agent import initialize_code_agent
40
+ from final_project.math_agent import initialize_math_agent
41
+ from final_project.planner_agent import initialize_planner_agent
42
+ from final_project.research_agent import initialize_research_agent
43
+ from final_project.role_agent import initialize_role_agent
44
+ from final_project.verifier_agent import initialize_verifier_agent
45
+ from final_project.advanced_validation_agent import initialize_advanced_validation_agent
46
+ from final_project.figure_interpretation_agent import initialize_figure_interpretation_agent
47
+ from final_project.long_context_management_agent import initialize_long_context_management_agent
48
+ AGENT_IMPORT_PATH = "final_project"
49
+ except ImportError as e2:
50
+ print(f"Import Error: Could not find agent modules. Tried local and final_project paths. Error: {e2}")
51
+ # Set initializers to None or raise error to prevent app start
52
+ initialize_image_analyzer_agent = None
53
+ # ... set all others to None ...
54
+ raise RuntimeError(f"Failed to import agent modules: {e2}")
55
+
56
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
57
+ load_dotenv() # Load environment variables from .env file
58
+
59
+ # Setup logging
60
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
61
+ logger = logging.getLogger(__name__)
62
+
63
+ # --- Constants ---
64
+ DEFAULT_API_URL = os.getenv("GAIA_API_URL", "https://agents-course-unit4-scoring.hf.space")
65
+
66
+ # --- Agent Initialization (Singleton Pattern) ---
67
+ # Initialize the agent workflow once
68
+ AGENT_WORKFLOW = None
69
+ try:
70
+ logger.info(f"Initializing GAIA Multi-Agent Workflow (import path: {AGENT_IMPORT_PATH})...")
71
+ # Existing agents
72
+ role_agent = initialize_role_agent()
73
+ code_agent = initialize_code_agent()
74
+ math_agent = initialize_math_agent()
75
+ planner_agent = initialize_planner_agent()
76
+ research_agent = initialize_research_agent()
77
+ text_analyzer_agent = initialize_text_analyzer_agent()
78
+ verifier_agent = initialize_verifier_agent()
79
+ image_analyzer_agent = initialize_image_analyzer_agent()
80
+ reasoning_agent = initialize_reasoning_agent()
81
+ # New agents
82
+ advanced_validation_agent = initialize_advanced_validation_agent()
83
+ figure_interpretation_agent = initialize_figure_interpretation_agent()
84
+ long_context_management_agent = initialize_long_context_management_agent()
85
+
86
+ # Check if all agents initialized successfully
87
+ all_agents = [
88
+ code_agent, role_agent, math_agent, planner_agent, research_agent,
89
+ text_analyzer_agent, image_analyzer_agent, verifier_agent, reasoning_agent,
90
+ advanced_validation_agent, figure_interpretation_agent, long_context_management_agent
91
+ ]
92
+ if not all(all_agents):
93
+ raise RuntimeError("One or more agents failed to initialize.")
94
+
95
+ AGENT_WORKFLOW = AgentWorkflow(
96
+ agents=all_agents,
97
+ root_agent="planner_agent" # Keep planner as root as per plan
98
+ )
99
+ logger.info("GAIA Multi-Agent Workflow initialized successfully.")
100
+ except Exception as e:
101
+ logger.error(f"FATAL: Error initializing agent workflow: {e}", exc_info=True)
102
+ # AGENT_WORKFLOW remains None, BasicAgent init will fail
103
+
104
+ # --- Basic Agent Definition (Wrapper for Workflow) ---
105
+ class BasicAgent:
106
+ def __init__(self, workflow: AgentWorkflow):
107
+ if workflow is None:
108
+ logger.error("AgentWorkflow is None, initialization likely failed.")
109
+ raise RuntimeError("AgentWorkflow failed to initialize. Check logs for details.")
110
+ self.agent_workflow = workflow
111
+ logger.info("BasicAgent wrapper initialized.")
112
+
113
+ async def __call__(self, question: str | ChatMessage) -> Any:
114
+ if isinstance(question, ChatMessage):
115
+ log_question = str(question.blocks[0].text)[:100] if question.blocks and hasattr(question.blocks[0], "text") else str(question)[:100]
116
+ logger.info(f"Agent received question (first 100 chars): {log_question}...")
117
+ else:
118
+ logger.info(f"Agent received question (first 100 chars): {question[:100]}...")
119
+
120
+ handler = self.agent_workflow.run(user_msg=question)
121
+
122
+ current_agent = None
123
+ async for event in handler.stream_events():
124
+ if (
125
+ hasattr(event, "current_agent_name")
126
+ and event.current_agent_name != current_agent
127
+ ):
128
+ current_agent = event.current_agent_name
129
+ logger.info(f"{'=' * 50}\n")
130
+ logger.info(f"{'=' * 50}\n")
131
+
132
+ # Optional detailed logging (uncomment if needed)
133
+ # from llama_index.core.agent.runner.base import AgentStream, AgentInput
134
+ # if isinstance(event, AgentStream):
135
+ # if event.delta:
136
+ # logger.debug(f"STREAM: {event.delta}") # Use debug level
137
+ # elif isinstance(event, AgentInput):
138
+ # logger.debug(f"📥 Input: {event.input}") # Use debug level
139
+ elif isinstance(event, AgentOutput):
140
+ if event.response and hasattr(event.response, 'content') and event.response.content:
141
+ logger.info(f"📤 Output: {event.response.content}")
142
+ if event.tool_calls:
143
+ logger.info(
144
+ f"🛠️ Planning to use tools: {[call.tool_name for call in event.tool_calls]}"
145
+ )
146
+ elif isinstance(event, ToolCallResult):
147
+ logger.info(f"🔧 Tool Result ({event.tool_name}):")
148
+ logger.info(f" Arguments: {event.tool_kwargs}")
149
+ # Limit output logging length if potentially very long
150
+ output_str = str(event.tool_output)
151
+ logger.info(f" Output: {output_str[:500]}{'...' if len(output_str) > 500 else ''}")
152
+ elif isinstance(event, ToolCall):
153
+ logger.info(f"🔨 Calling Tool: {event.tool_name}")
154
+ logger.info(f" With arguments: {event.tool_kwargs}")
155
+
156
+ answer = await handler
157
+ final_content = answer.response.content if hasattr(answer, 'response') and hasattr(answer.response, 'content') else str(answer)
158
+ logger.info(f"Agent returning final answer: {final_content[:500]}{'...' if len(final_content) > 500 else ''}")
159
+ return answer.response # Return the actual response object expected by Gradio
160
+
161
+ # --- Helper Functions for run_and_submit_all ---
162
+
163
+ async def fetch_questions(questions_url: str) -> List[dict] | None:
164
+ """Fetches questions from the GAIA benchmark API."""
165
+ logger.info(f"Fetching questions from: {questions_url}")
166
+ try:
167
+ response = requests.get(questions_url, timeout=30) # Increased timeout
168
+ response.raise_for_status()
169
+ questions_data = response.json()
170
+ if not questions_data:
171
+ logger.warning("Fetched questions list is empty.")
172
+ return None
173
+ logger.info(f"Fetched {len(questions_data)} questions.")
174
+ return questions_data
175
+ except requests.exceptions.RequestException as e:
176
+ logger.error(f"Error fetching questions: {e}", exc_info=True)
177
+ return None
178
+ except requests.exceptions.JSONDecodeError as e:
179
+ logger.error(f"Error decoding JSON response from questions endpoint: {e}", exc_info=True)
180
+ logger.error(f"Response text: {response.text[:500]}")
181
+ return None
182
+ except Exception as e:
183
+ logger.error(f"An unexpected error occurred fetching questions: {e}", exc_info=True)
184
+ return None
185
+
186
+ async def process_question(agent: BasicAgent, item: dict, base_fetch_file_url: str) -> dict | None:
187
+ """Processes a single question item using the agent."""
188
+ task_id = item.get("task_id")
189
+ question_text = item.get("question")
190
+ file_name = item.get("file_name")
191
+
192
+ if not task_id or question_text is None:
193
+ logger.warning(f"Skipping item with missing task_id or question: {item}")
194
+ return None
195
+
196
+ message: ChatMessage
197
+ if file_name:
198
+ fetch_file_url = f"{base_fetch_file_url}/{task_id}"
199
+ logger.info(f"Fetching file '{file_name}' for task {task_id} from {fetch_file_url}")
200
+ try:
201
+ response = requests.get(fetch_file_url, timeout=60) # Increased timeout for files
202
+ response.raise_for_status()
203
+ mime_type, _ = mimetypes.guess_type(file_name)
204
+ logger.info(f"File '{file_name}' MIME type guessed as: {mime_type}")
205
+
206
+ file_block: TextBlock | ImageBlock | AudioBlock | None = None
207
+ if mime_type:
208
+ # Prioritize specific extensions for text-like content
209
+ text_extensions = (
210
+ ".txt", ".csv", ".json", ".xml", ".yaml", ".yml", ".ini", ".cfg", ".toml", ".log", ".properties",
211
+ ".html", ".htm", ".xhtml", ".css", ".scss", ".sass", ".less", ".svg", ".md", ".rst",
212
+ ".py", ".js", ".java", ".c", ".cpp", ".h", ".hpp", ".cs", ".go", ".php", ".rb", ".swift", ".kt",
213
+ ".sh", ".bat", ".ipynb", ".Rmd", ".tex" # Added more code/markup types
214
+ )
215
+ if mime_type.startswith('text/') or file_name.lower().endswith(text_extensions):
216
+ try:
217
+ file_content = response.content.decode('utf-8') # Try UTF-8 first
218
+ except UnicodeDecodeError:
219
+ try:
220
+ file_content = response.content.decode('latin-1') # Fallback
221
+ logger.warning(f"Decoded file {file_name} using latin-1 fallback.")
222
+ except Exception as decode_err:
223
+ logger.error(f"Could not decode file {file_name}: {decode_err}")
224
+ file_content = f"[Error: Could not decode file content for {file_name}]"
225
+ file_block = TextBlock(block_type="text", text=file_content)
226
+ elif mime_type.startswith('image/'):
227
+ # Pass image content directly for multi-modal models
228
+ file_block = ImageBlock(url=fetch_file_url, image=response.content)
229
+ elif mime_type.startswith('audio/'):
230
+ # Pass audio content directly
231
+ file_block = AudioBlock(url=fetch_file_url, audio=response.content)
232
+ elif mime_type == 'application/pdf':
233
+ # PDF: Pass a text block indicating the URL for agents to handle
234
+ logger.info(f"PDF file detected: {file_name}. Passing reference URL.")
235
+ file_block = TextBlock(text=f"[Reference PDF file available at: {fetch_file_url}]")
236
+ # Add handling for other types like video if needed
237
+ # elif mime_type.startswith('video/'):
238
+ # logger.info(f"Video file detected: {file_name}. Passing reference URL.")
239
+ # file_block = TextBlock(text=f"[Reference Video file available at: {fetch_file_url}]")
240
+
241
+ if file_block:
242
+ blocks = [TextBlock(text=question_text), file_block]
243
+ message = ChatMessage(role="user", blocks=blocks)
244
+ else:
245
+ logger.warning(f"File type for '{file_name}' (MIME: {mime_type}) not directly supported for block creation or no block created (e.g., unsupported). Passing text question only.")
246
+ message = ChatMessage(role="user", blocks=[TextBlock(text=question_text)])
247
+
248
+ except requests.exceptions.RequestException as e:
249
+ logger.error(f"Error fetching file for task {task_id}: {e}", exc_info=True)
250
+ return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: Failed to fetch file {file_name} - {e}"}
251
+ except Exception as e:
252
+ logger.error(f"Error processing file for task {task_id}: {e}", exc_info=True)
253
+ return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: Failed to process file {file_name} - {e}"}
254
+ else:
255
+ # No file associated with the question
256
+ message = ChatMessage(role="user", blocks=[TextBlock(text=question_text)])
257
+
258
+ # Run the agent on the prepared message
259
+ try:
260
+ logger.info(f"Running agent on task {task_id}...")
261
+ submitted_answer_response = await agent(message)
262
+ # Extract content safely
263
+ submitted_answer = submitted_answer_response.content if hasattr(submitted_answer_response, 'content') else str(submitted_answer_response)
264
+
265
+ logger.info(f"👍 Agent submitted answer for task {task_id}: {submitted_answer[:200]}{'...' if len(submitted_answer) > 200 else ''}")
266
+ return {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
267
+ except Exception as e:
268
+ logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
269
+ return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
270
+
271
+ async def submit_answers(submit_url: str, username: str, agent_code: str, results: List[dict]) -> tuple[str, pd.DataFrame]:
272
+ """Submits the collected answers to the GAIA benchmark API."""
273
+ answers_payload = [
274
+ {"task_id": r["Task ID"], "submitted_answer": r["Submitted Answer"]}
275
+ for r in results if "Submitted Answer" in r and not str(r["Submitted Answer"]).startswith("AGENT ERROR:")
276
+ ]
277
+
278
+ if not answers_payload:
279
+ logger.warning("Agent did not produce any valid answers to submit.")
280
+ results_df = pd.DataFrame(results)
281
+ return "Agent did not produce any valid answers to submit.", results_df
282
+
283
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
284
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
285
+ logger.info(status_update)
286
+ logger.info(f"Submitting to: {submit_url}")
287
+
288
+ try:
289
+ response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
290
+ response.raise_for_status()
291
+ result_data = response.json()
292
+ final_status = (
293
+ f"Submission Successful!\n"
294
+ f"User: {result_data.get('username')}\n"
295
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
296
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
297
+ f"Message: {result_data.get('message', 'No message received.')}"
298
+ )
299
+ logger.info("Submission successful.")
300
+ results_df = pd.DataFrame(results)
301
+ return final_status, results_df
302
+ except requests.exceptions.HTTPError as e:
303
+ error_detail = f"Server responded with status {e.response.status_code}."
304
+ try:
305
+ error_json = e.response.json()
306
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
307
+ except requests.exceptions.JSONDecodeError:
308
+ error_detail += f" Response: {e.response.text[:500]}"
309
+ status_message = f"Submission Failed: {error_detail}"
310
+ logger.error(status_message)
311
+ results_df = pd.DataFrame(results)
312
+ return status_message, results_df
313
+ except requests.exceptions.Timeout:
314
+ status_message = "Submission Failed: The request timed out."
315
+ logger.error(status_message)
316
+ results_df = pd.DataFrame(results)
317
+ return status_message, results_df
318
+ except requests.exceptions.RequestException as e:
319
+ status_message = f"Submission Failed: Network error - {e}"
320
+ logger.error(status_message)
321
+ results_df = pd.DataFrame(results)
322
+ return status_message, results_df
323
+ except Exception as e:
324
+ status_message = f"Submission Failed: An unexpected error occurred during submission - {e}"
325
+ logger.error(status_message, exc_info=True)
326
+ results_df = pd.DataFrame(results)
327
+ return status_message, results_df
328
+
329
+ # --- Main Function for Batch Processing ---
330
+ async def run_and_submit_all(
331
+ username: str,
332
+ agent_code: str,
333
+ api_url: str = DEFAULT_API_URL,
334
+ level: int = 1,
335
+ max_questions: int = 0, # 0 means all questions for the level
336
+ progress=gr.Progress(track_tqdm=True)
337
+ ) -> tuple[str, pd.DataFrame]:
338
+ """Fetches all questions for a level, runs the agent, and submits answers."""
339
+ if not AGENT_WORKFLOW:
340
+ error_msg = "Agent Workflow is not initialized. Cannot run benchmark."
341
+ logger.error(error_msg)
342
+ return error_msg, pd.DataFrame()
343
+
344
+ if not username or not username.strip():
345
+ error_msg = "Username cannot be empty."
346
+ logger.error(error_msg)
347
+ return error_msg, pd.DataFrame()
348
+
349
+ questions_url = f"{api_url}/questions?level={level}"
350
+ submit_url = f"{api_url}/submit"
351
+ base_fetch_file_url = f"{api_url}/get_file"
352
+
353
+ questions = await fetch_questions(questions_url)
354
+ if questions is None:
355
+ error_msg = f"Failed to fetch questions for level {level}. Check logs."
356
+ return error_msg, pd.DataFrame()
357
+
358
+ # Limit number of questions if max_questions is set
359
+ if max_questions > 0:
360
+ questions = questions[:max_questions]
361
+ logger.info(f"Processing a maximum of {max_questions} questions for level {level}.")
362
+ else:
363
+ logger.info(f"Processing all {len(questions)} questions for level {level}.")
364
+
365
+ agent = BasicAgent(AGENT_WORKFLOW)
366
+ results = []
367
+ total_questions = len(questions)
368
+
369
+ for i, item in enumerate(progress.tqdm(questions, desc=f"Processing Level {level} Questions")):
370
+ result = await process_question(agent, item, base_fetch_file_url)
371
+ if result:
372
+ results.append(result)
373
+ # Optional: Add a small delay between questions if needed
374
+ # await asyncio.sleep(0.1)
375
+
376
+ # Submit answers
377
+ final_status, results_df = await submit_answers(submit_url, username, agent_code, results)
378
+ return final_status, results_df
379
+
380
+ # --- Gradio Interface ---
381
+ def create_gradio_interface():
382
+ """Creates and returns the Gradio interface."""
383
+ logger.info("Creating Gradio interface...")
384
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
385
+ gr.Markdown("# GAIA Benchmark Agent Runner")
386
+ gr.Markdown("Run the initialized multi-agent system against the GAIA benchmark questions and submit the results.")
387
+
388
+ with gr.Row():
389
+ username = gr.Textbox(label="Username", placeholder="Enter your username (e.g., [email protected])")
390
+ agent_code = gr.Textbox(label="Agent Code", placeholder="Enter a short code for your agent (e.g., v1.0)")
391
+ with gr.Row():
392
+ level = gr.Dropdown(label="Benchmark Level", choices=[1, 2, 3], value=1)
393
+ max_questions = gr.Number(label="Max Questions (0 for all)", value=0, minimum=0, step=1)
394
+ api_url = gr.Textbox(label="GAIA API URL", value=DEFAULT_API_URL)
395
+
396
+ run_button = gr.Button("Run Benchmark and Submit", variant="primary")
397
+
398
+ with gr.Accordion("Results", open=False):
399
+ status_output = gr.Textbox(label="Submission Status", lines=5)
400
+ results_dataframe = gr.DataFrame(label="Detailed Results")
401
+
402
+ run_button.click(
403
+ fn=run_and_submit_all,
404
+ inputs=[username, agent_code, api_url, level, max_questions],
405
+ outputs=[status_output, results_dataframe]
406
+ )
407
+ logger.info("Gradio interface created.")
408
+ return demo
409
+
410
+ # --- Main Execution ---
411
+ if __name__ == "__main__":
412
+ if not AGENT_WORKFLOW:
413
+ print("ERROR: Agent Workflow failed to initialize. Cannot start Gradio app.")
414
+ print("Please check logs for initialization errors (e.g., missing API keys, import issues).")
415
+ else:
416
+ gradio_app = create_gradio_interface()
417
+ # Launch Gradio app
418
+ # Share=True creates a public link (use with caution)
419
+ # Set server_name="0.0.0.0" to allow access from network
420
+ gradio_app.launch(server_name="0.0.0.0", server_port=7860)
421
+
current_architecture.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Current GAIA Multi-Agent Framework Architecture
2
+
3
+ This document summarizes the architecture of the GAIA multi-agent framework based on the provided Python source files.
4
+
5
+ ## Core Framework
6
+
7
+ * **Technology:** The system is built using the `llama_index.core.agent.workflow.AgentWorkflow` from the LlamaIndex library.
8
+ * **Orchestration:** `app.py` serves as the main entry point. It initializes a Gradio web interface, fetches benchmark questions from a specified API endpoint, manages file handling (text, image, audio) associated with questions, runs the agent workflow for each question, and submits the answers back to the API.
9
+ * **Root Agent:** The workflow designates `planner_agent` as the `root_agent`, meaning it receives the initial user request (question) and orchestrates the subsequent steps.
10
+
11
+ ## Agent Roster and Capabilities
12
+
13
+ The framework comprises several specialized agents, each designed for specific tasks:
14
+
15
+ 1. **`planner_agent` (Root):**
16
+ * **Purpose:** Strategic planning, task decomposition, and final synthesis.
17
+ * **Tools:** `generate_substeps` (breaks down objectives using an LLM), `synthesize_and_respond` (aggregates results into a final report using an LLM).
18
+ * **Workflow:** Receives the initial objective, breaks it into sub-steps, delegates these steps to appropriate specialist agents, and finally synthesizes the collected results into a coherent answer.
19
+ * **Handoffs:** Can delegate to `code_agent`, `research_agent`, `math_agent`, `role_agent`, `image_analyzer_agent`, `text_analyzer_agent`, `verifier_agent`, `reasoning_agent`.
20
+
21
+ 2. **`role_agent`:**
22
+ * **Purpose:** Determines and sets the appropriate persona or context for the task.
23
+ * **Tools:** `role_prompt_retriever` (uses a combination of vector search and BM25 retrieval on the `fka/awesome-chatgpt-prompts` dataset, followed by reranking, to find the best role/prompt).
24
+ * **Workflow:** Interprets user intent, retrieves relevant role descriptions, selects the best fit, and provides the role/prompt.
25
+ * **Handoffs:** Hands off to `planner_agent` after setting the role.
26
+
27
+ 3. **`code_agent`:**
28
+ * **Purpose:** Generates and executes Python code.
29
+ * **Tools:** `python_code_generator` (uses an OpenAI model `o4-mini` to generate code from a prompt), `code_interpreter` (uses LlamaIndex's tool spec, likely for sandboxed execution), and a custom `SimpleCodeExecutor` (executes Python code via `subprocess`, **not safe for production**).
30
+ * **Workflow:** Takes a description, generates code, executes/tests it, and returns the result or final code.
31
+ * **Handoffs:** Hands off to `planner_agent` or `reasoning_agent`.
32
+
33
+ 4. **`math_agent`:**
34
+ * **Purpose:** Performs mathematical computations.
35
+ * **Tools:** A large suite of functions covering symbolic math (SymPy), matrix operations (NumPy), statistics (NumPy), numerical methods (NumPy, SciPy), vector math (NumPy), probability (SciPy), and potentially more (file was truncated). Also includes WolframAlpha integration.
36
+ * **Workflow:** Executes specific mathematical operations based on requests.
37
+ * **Handoffs:** (Inferred) Likely hands off to `planner_agent` or `reasoning_agent`.
38
+
39
+ 5. **`research_agent`:**
40
+ * **Purpose:** Gathers information from the web and specialized sources.
41
+ * **Tools:** Web search (Google, DuckDuckGo, Tavily), web browsing/interaction (Helium/Selenium: `visit`, `get_text_by_css`, `get_page_html`, `click_element`, `search_item_ctrl_f`, `go_back`, `close_popups`), Wikipedia search/loading, Yahoo Finance data retrieval, ArXiv paper search.
42
+ * **Workflow:** Executes a plan-act-observe loop to find and extract information from various online sources.
43
+ * **Handoffs:** Can delegate to `code_agent`, `math_agent`, `analyzer_agent` (likely meant `text_analyzer_agent` or `image_analyzer_agent`), `planner_agent`, `reasoning_agent`.
44
+
45
+ 6. **`text_analyzer_agent`:**
46
+ * **Purpose:** Extracts text from PDFs and analyzes text content.
47
+ * **Tools:** `extract_text_from_pdf` (uses PyPDF2, handles URLs and local files), `analyze_text` (uses an LLM to generate summary and key facts).
48
+ * **Workflow:** If input is PDF, extracts text; then analyzes the text to produce a summary and list of facts.
49
+ * **Handoffs:** Hands off to `verifier_agent`.
50
+
51
+ 7. **`image_analyzer_agent`:**
52
+ * **Purpose:** Analyzes image content factually.
53
+ * **Tools:** Relies directly on the multimodal capabilities of its underlying LLM (Gemini 1.5 Pro) to process image inputs provided via `ChatMessage` blocks. No specific image analysis *tool* is defined, but the system prompt dictates a detailed, structured analysis format.
54
+ * **Workflow:** Receives an image, performs analysis according to a strict factual template.
55
+ * **Handoffs:** Hands off to `planner_agent`, `research_agent`, or `reasoning_agent`.
56
+
57
+ 8. **`verifier_agent`:**
58
+ * **Purpose:** Assesses the confidence of factual statements and detects contradictions.
59
+ * **Tools:** `verify_facts` (uses an LLM - Gemini 2.0 Flash - to assign confidence scores), `find_contradictions` (uses simple string matching for negation pairs).
60
+ * **Workflow:** Takes a list of facts, scores them, checks for contradictions, and reports results.
61
+ * **Handoffs:** Hands off to `reasoning_agent` or `planner_agent`.
62
+
63
+ 9. **`reasoning_agent`:**
64
+ * **Purpose:** Performs explicit chain-of-thought reasoning.
65
+ * **Tools:** `reasoning_tool` (uses an OpenAI model `o4-mini` with a detailed prompt to perform CoT reasoning over the provided context).
66
+ * **Workflow:** Takes context, applies reasoning via the tool, and provides the structured reasoning output.
67
+ * **Handoffs:** Hands off to `planner_agent`.
68
+
69
+ ## Workflow and Data Flow
70
+
71
+ 1. A question (potentially with associated files) arrives at `app.py`.
72
+ 2. `app.py` formats the input (e.g., `ChatMessage` with `TextBlock`, `ImageBlock`, `AudioBlock`) and passes it to the `AgentWorkflow` starting with `planner_agent`.
73
+ 3. `planner_agent` breaks down the task.
74
+ 4. It may call `role_agent` to set context.
75
+ 5. It delegates sub-tasks to specialized agents (`research`, `code`, `math`, `text_analyzer`, `image_analyzer`).
76
+ 6. Agents execute their tasks, potentially calling tools or other agents (e.g., `text_analyzer` calls `verifier_agent`).
77
+ 7. `reasoning_agent` might be called for complex logical steps or verification.
78
+ 8. Results flow back up, eventually reaching `planner_agent`.
79
+ 9. `planner_agent` synthesizes the final answer using `synthesize_and_respond`.
80
+ 10. `app.py` receives the final answer and submits it.
81
+
82
+ ## Technology Stack Summary
83
+
84
+ * **Core:** Python, LlamaIndex
85
+ * **LLMs:** Google Gemini (1.5 Pro, 2.0 Flash), OpenAI (o4-mini)
86
+ * **UI:** Gradio
87
+ * **Web Interaction:** Selenium, Helium
88
+ * **Data Handling:** Pandas, PyPDF2, Requests
89
+ * **Search/Retrieval:** HuggingFace Embeddings/Rerankers, Datasets, LlamaIndex Tool Specs (Google, Tavily, Wikipedia, DuckDuckGo, Yahoo Finance, ArXiv)
90
+ * **Math:** SymPy, NumPy, SciPy, WolframAlpha
91
+ * **Code Execution:** Subprocess (basic executor), LlamaIndex Code Interpreter
gaia_improvement_plan.md ADDED
@@ -0,0 +1,943 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ### 3.5. `research_agent.py` Refactoring
3
+
4
+ * **Rationale:** To improve browser instance management, error handling, and configuration.
5
+ * **Proposals:**
6
+ 1. **Browser Lifecycle Management:** Instead of initializing the browser (`start_chrome`) at the module level, manage its lifecycle explicitly. Options:
7
+ * Initialize the browser within the agent's initialization and provide a method or tool to explicitly close it (`kill_browser`) when the agent's task is done or the application shuts down.
8
+ * Use a context manager (`with start_chrome(...) as browser:`) if the browser is only needed for a specific scope within a tool call (less likely for a persistent agent).
9
+ * Ensure `kill_browser` is reliably called. Perhaps the `planner_agent` could invoke a cleanup tool/method on the `research_agent` after its tasks are complete.
10
+ 2. **Configuration:** Move hardcoded Chrome options to configuration. Externalize API keys/IDs if not already done (they seem to be using `os.getenv`, which is good).
11
+ 3. **Robust Error Handling:** For browser interaction tools (`visit`, `get_text_by_css`, `click_element`), raise specific custom exceptions instead of returning error strings. This allows for more structured error handling by the agent or workflow.
12
+ 4. **Tool Consolidation (Optional):** The agent has many tools. Consider if some related tools (e.g., different search APIs) could be consolidated behind a single tool that internally chooses the best source, or if the LLM handles the large toolset effectively.
13
+
14
+ * **Diff Patch (Illustrative - Configuration & Browser Init):**
15
+
16
+ ```diff
17
+ --- a/research_agent.py
18
+ +++ b/research_agent.py
19
+ @@ -1,5 +1,6 @@
20
+ import os
21
+ import time
22
+ + import logging
23
+ from typing import List
24
+
25
+ from llama_index.core.agent.workflow import ReActAgent
26
+ @@ -15,17 +16,21 @@
27
+ from helium import start_chrome, go_to, find_all, Text, kill_browser
28
+ from helium import get_driver
29
+
30
+ + logger = logging.getLogger(__name__)
31
+ +
32
+ # 1. Helium
33
+ -chrome_options = webdriver.ChromeOptions()
34
+ -chrome_options.add_argument("--no-sandbox")
35
+ -chrome_options.add_argument("--disable-dev-shm-usage")
36
+ -chrome_options.add_experimental_option("prefs", {
37
+ - "download.prompt_for_download": False,
38
+ - "plugins.always_open_pdf_externally": True,
39
+ - "profile.default_content_settings.popups": 0
40
+ -})
41
+ -
42
+ -browser = start_chrome(headless=True, options=chrome_options)
43
+ +# Browser instance should be managed, not global at module level
44
+ +# browser = start_chrome(headless=True, options=chrome_options)
45
+ +
46
+ +def get_chrome_options():
47
+ + options = webdriver.ChromeOptions()
48
+ + if os.getenv("RESEARCH_AGENT_CHROME_NO_SANDBOX", "true").lower() == "true":
49
+ + options.add_argument("--no-sandbox")
50
+ + if os.getenv("RESEARCH_AGENT_CHROME_DISABLE_DEV_SHM", "true").lower() == "true":
51
+ + options.add_argument("--disable-dev-shm-usage")
52
+ + # Add other options from config as needed
53
+ + # options.add_experimental_option(...) # Example
54
+ + return options
55
+
56
+ def visit(url: str, wait_seconds: float = 2.0) -> str |None:
57
+ """
58
+ @@ -36,10 +41,11 @@
59
+ wait_seconds (float): Time to wait after navigation.
60
+ """
61
+ try:
62
+ + # Assumes browser is available in context (e.g., class member)
63
+ go_to(url)
64
+ time.sleep(wait_seconds)
65
+ return f"Visited: {url}"
66
+ except Exception as e:
67
+ + logger.error(f"Error visiting {url}: {e}", exc_info=True)
68
+ return f"Error visiting {url}: {e}"
69
+
70
+ def get_text_by_css(selector: str) -> List[str] | str:
71
+ @@ -52,13 +58,15 @@
72
+ List[str]: List of text contents.
73
+ """
74
+ try:
75
+ + # Assumes browser/helium context is active
76
+ if selector.lower() == 'body':
77
+ elements = find_all(Text())
78
+ else:
79
+ elements = find_all(selector)
80
+ texts = [elem.web_element.text for elem in elements]
81
+ - print(f"Extracted {len(texts)} elements for selector \'{selector}\'")
82
+ + logger.info(f"Extracted {len(texts)} elements for selector \'{selector}\'")
83
+ return texts
84
+ except Exception as e:
85
+ + logger.error(f"Error extracting text for selector {selector}: {e}", exc_info=True)
86
+ return f"Error extracting text for selector {selector}: {e}"
87
+
88
+ def get_page_html() -> str:
89
+ @@ -70,9 +78,11 @@
90
+ str: HTML content, or empty string on error.
91
+ """
92
+ try:
93
+ + # Assumes browser/helium context is active
94
+ driver = get_driver()
95
+ html = driver.page_source
96
+ return html
97
+ except Exception as e:
98
+ + logger.error(f"Error extracting HTML: {e}", exc_info=True)
99
+ return f"Error extracting HTML: {e}"
100
+
101
+ def click_element(selector: str, index_element: int = 0) -> str:
102
+ @@ -83,10 +93,12 @@
103
+ selector (str): CSS selector of the element to click.
104
+ """
105
+ try:
106
+ + # Assumes browser/helium context is active
107
+ element = find_all(selector)[index_element]
108
+ element.click()
109
+ time.sleep(1)
110
+ return f"Clicked element matching selector \'{selector}\'"
111
+ except Exception as e:
112
+ + logger.error(f"Error clicking element {selector}: {e}", exc_info=True)
113
+ return f"Error clicking element {selector}: {e}"
114
+
115
+ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
116
+ @@ -97,6 +109,7 @@
117
+ nth_result: Which occurrence to jump to (default: 1)
118
+ """
119
+ elements = browser.find_elements(By.XPATH, f"//*[contains(text(), \'{text}\')]")
120
+ + # Assumes browser is available in context
121
+ if nth_result > len(elements):
122
+ return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
123
+ result = f"Found {len(elements)} matches for \'{text}\'."
124
+ @@ -107,19 +120,22 @@
125
+
126
+ def go_back() -> None:
127
+ """Goes back to previous page."""
128
+ browser.back()
129
+ + # Assumes browser is available in context
130
+
131
+ def close_popups() -> None:
132
+ """
133
+ Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
134
+ """
135
+ webdriver.ActionChains(browser).send_keys(Keys.ESCAPE).perform()
136
+ + # Assumes browser is available in context
137
+
138
+ def close() -> None:
139
+ """
140
+ Close the browser instance.
141
+ """
142
+ try:
143
+ + # Assumes kill_browser is appropriate here
144
+ kill_browser()
145
+ - print("Browser closed")
146
+ + logger.info("Browser closed via kill_browser()")
147
+ except Exception as e:
148
+ - print(f"Error closing browser: {e}")
149
+ + logger.error(f"Error closing browser: {e}", exc_info=True)
150
+
151
+ visit_tool = FunctionTool.from_defaults(
152
+ fn=visit,
153
+ @@ -240,9 +256,14 @@
154
+
155
+
156
+ def initialize_research_agent() -> ReActAgent:
157
+ + # Browser initialization should happen here or be managed externally
158
+ + # Example: browser = start_chrome(headless=True, options=get_chrome_options())
159
+ + # Ensure browser instance is passed to tools or accessible via agent state/class
160
+ +
161
+ + llm_model_name = os.getenv("RESEARCH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
162
+ llm = GoogleGenAI(
163
+ api_key=os.getenv("GEMINI_API_KEY"),
164
+ - model="models/gemini-1.5-pro",
165
+ + model=llm_model_name,
166
+ )
167
+
168
+ system_prompt = """\
169
+ ```
170
+
171
+
172
+ ### 3.6. `text_analyzer_agent.py` Refactoring
173
+
174
+ * **Rationale:** To improve configuration management and error handling.
175
+ * **Proposals:**
176
+ 1. **Configuration:** Move the hardcoded LLM model name (`models/gemini-1.5-pro`) to environment variables or a configuration file.
177
+ 2. **Prompt Management:** Move the `analyze_text` prompt to a separate template file.
178
+ 3. **Error Handling:** In `extract_text_from_pdf`, consider raising specific exceptions (e.g., `PDFDownloadError`, `PDFParsingError`) instead of returning error strings, allowing the agent to handle failures more gracefully.
179
+
180
+ * **Diff Patch (Illustrative - Configuration & Error Handling):**
181
+
182
+ ```diff
183
+ --- a/text_analyzer_agent.py
184
+ +++ b/text_analyzer_agent.py
185
+ @@ -6,6 +6,14 @@
186
+
187
+ logger = logging.getLogger(__name__)
188
+
189
+ + class PDFExtractionError(Exception):
190
+ + """Custom exception for PDF extraction failures."""
191
+ + pass
192
+ +
193
+ + class PDFDownloadError(PDFExtractionError):
194
+ + """Custom exception for PDF download failures."""
195
+ + pass
196
+ +
197
+ def extract_text_from_pdf(source: str) -> str:
198
+ """
199
+ Extract raw text from a PDF file on disk or at a URL.
200
+ @@ -19,21 +27,21 @@
201
+ try:
202
+ resp = requests.get(source, timeout=10)
203
+ resp.raise_for_status()
204
+ - except Exception as e:
205
+ - return f"Error downloading PDF from {source}: {e}"
206
+ + except requests.exceptions.RequestException as e:
207
+ + raise PDFDownloadError(f"Error downloading PDF from {source}: {e}") from e
208
+
209
+ try:
210
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
211
+ tmp.write(resp.content)
212
+ tmp.flush()
213
+ tmp_path = tmp.name
214
+ tmp.close()
215
+ - except Exception as e:
216
+ - return f"Error writing temp PDF file: {e}"
217
+ + except IOError as e:
218
+ + raise PDFExtractionError(f"Error writing temp PDF file: {e}") from e
219
+ path = tmp_path
220
+ else:
221
+ path = source
222
+
223
+ # Now extract text from the PDF on disk
224
+ if not os.path.isfile(path):
225
+ - return f"PDF not found: {path}"
226
+ + raise PDFExtractionError(f"PDF not found: {path}")
227
+
228
+ text = ""
229
+
230
+ @@ -41,10 +49,10 @@
231
+ reader = PdfReader(path)
232
+ pages = [page.extract_text() or "" for page in reader.pages]
233
+ text = "\n".join(pages)
234
+ - print(f"Extracted {len(pages)} pages of text from PDF")
235
+ + logger.info(f"Extracted {len(pages)} pages of text from PDF: {path}")
236
+ except Exception as e:
237
+ # Catch specific PyPDF2 errors if possible, otherwise general Exception
238
+ - return f"Error reading PDF: {e}"
239
+ + raise PDFExtractionError(f"Error reading PDF {path}: {e}") from e
240
+
241
+ # Clean up temporary file if one was created
242
+ if source.lower().startswith(("http://", "https://")):
243
+ @@ -67,6 +75,14 @@
244
+ str: A plain-text string containing:
245
+ • A “Summary:” section with bullet points.
246
+ • A “Facts:” section with bullet points.
247
+ + """
248
+ + # Load prompt from file ideally
249
+ + prompt_template = """You are an expert analyst.
250
+ +
251
+ + Please analyze the following text and produce a plain-text response
252
+ + with two sections:
253
+ +
254
+ + Summary:
255
+ + • Provide 2–3 concise bullet points summarizing the main ideas.
256
+ +
257
+ + Facts:
258
+ + • List each verifiable fact found in the text as a bullet point.
259
+ +
260
+ + Respond with exactly that format—no JSON, no extra commentary.
261
+ +
262
+ + Text to analyze:
263
+ + \"\"\"
264
+ + {text}
265
+ + \"\"\"
266
+ """
267
+ # Build the prompt to guide the LLM’s output format
268
+ input_prompt = f"""You are an expert analyst.
269
+ @@ -84,13 +100,14 @@
270
+ {text}
271
+ \"\"\"
272
+ """
273
+ + input_prompt = prompt_template.format(text=text)
274
+
275
+ # Use the LLM to generate the analysis
276
+ + llm_model_name = os.getenv("TEXT_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
277
+ llm = GoogleGenAI(
278
+ api_key=os.getenv("GEMINI_API_KEY"),
279
+ - model="models/gemini-1.5-pro",
280
+ + model=llm_model_name,
281
+ )
282
+
283
+ generated = llm.complete(input_prompt)
284
+ @@ -124,9 +141,10 @@
285
+ FunctionAgent: Configured analysis agent.
286
+ """
287
+
288
+ + llm_model_name = os.getenv("TEXT_ANALYZER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
289
+ llm = GoogleGenAI(
290
+ api_key=os.getenv("GEMINI_API_KEY"),
291
+ - model="models/gemini-1.5-pro",
292
+ + model=llm_model_name,
293
+ )
294
+
295
+ system_prompt = """\
296
+ ```
297
+
298
+
299
+ ### 3.7. `reasoning_agent.py` Refactoring
300
+
301
+ * **Rationale:** To simplify the agent structure, improve configuration, and potentially optimize LLM usage.
302
+ * **Proposals:**
303
+ 1. **Configuration:** Move hardcoded LLM model names (`models/gemini-1.5-pro`, `o4-mini`) and the API key environment variable name (`ALPAFLOW_OPENAI_API_KEY`) to configuration.
304
+ 2. **Prompt Management:** Move the detailed CoT prompt from `reasoning_tool_fn` to a separate template file.
305
+ 3. **Agent Structure Simplification:** Given the rigid workflow (call tool -> handoff), consider replacing the `ReActAgent` with a simpler `FunctionAgent` that directly calls the `reasoning_tool` and formats the output before handing off. Alternatively, evaluate if the `reasoning_tool` logic could be integrated as a direct LLM call within agents that need CoT (like `planner_agent`), potentially removing the need for a separate `reasoning_agent` altogether, unless its specific CoT prompt/model (`o4-mini`) is crucial.
306
+
307
+ * **Diff Patch (Illustrative - Configuration & Prompt Loading):**
308
+
309
+ ```diff
310
+ --- a/reasoning_agent.py
311
+ +++ b/reasoning_agent.py
312
+ @@ -1,10 +1,19 @@
313
+ import os
314
+ + import logging
315
+
316
+ from llama_index.core.agent.workflow import ReActAgent
317
+ from llama_index.llms.google_genai import GoogleGenAI
318
+ from llama_index.core.tools import FunctionTool
319
+ from llama_index.llms.openai import OpenAI
320
+
321
+ + logger = logging.getLogger(__name__)
322
+ +
323
+ + def load_prompt_from_file(filename="reasoning_tool_prompt.txt") -> str:
324
+ + try:
325
+ + with open(filename, "r") as f:
326
+ + return f.read()
327
+ + except FileNotFoundError:
328
+ + logger.error(f"Prompt file {filename} not found.")
329
+ + return "Perform chain-of-thought reasoning on the context: {context}"
330
+ +
331
+ def reasoning_tool_fn(context: str) -> str:
332
+ """
333
+ Perform end-to-end chain-of-thought reasoning over the full multi-agent workflow context,
334
+ @@ -17,45 +26,12 @@
335
+ str: A structured reasoning trace with numbered thought steps, intermediate checks,
336
+ and a concise final recommendation or conclusion.
337
+ """
338
+ - prompt = f"""You are an expert reasoning engine. You have the following full context of a multi-agent workflow:
339
+ -
340
+ - {context}
341
+ -
342
+ - Your job is to:
343
+ - 1. **Comprehension**
344
+ - - Read the entire question or problem statement carefully.
345
+ - - Identify key terms, constraints, and desired outcomes.
346
+ -
347
+ - 2. **Decomposition**
348
+ - - Break down the problem into logical sub-steps or sub-questions.
349
+ - - Ensure each sub-step is necessary and sufficient to progress toward a solution.
350
+ -
351
+ - 3. **Chain-of-Thought**
352
+ - - Articulate your internal reasoning in clear, numbered steps.
353
+ - - At each step, state your assumptions, derive implications, and check for consistency.
354
+ -
355
+ - 4. **Intermediate Verification**
356
+ - - After each reasoning step, validate your conclusion against the problem’s constraints.
357
+ - - If a contradiction or uncertainty arises, revisit and refine the previous step.
358
+ -
359
+ - 5. **Synthesis**
360
+ - - Once all sub-steps are resolved, integrate the intermediate results into a cohesive answer.
361
+ - - Ensure the final answer directly addresses the user’s request and all specified criteria.
362
+ -
363
+ - 6. **Clarity & Precision**
364
+ - - Use formal, precise language.
365
+ - - Avoid ambiguity: define any technical terms you introduce.
366
+ - - Provide just enough detail to justify each conclusion without digression.
367
+ -
368
+ - 7. **Final Answer**
369
+ - - Present a concise, well-structured response.
370
+ - - If appropriate, include a brief summary of your reasoning steps.
371
+ -
372
+ - Respond with your reasoning steps followed by the final recommendation.
373
+ - """
374
+ + prompt_template = load_prompt_from_file()
375
+ + prompt = prompt_template.format(context=context)
376
+
377
+ + reasoning_llm_model = os.getenv("REASONING_TOOL_LLM_MODEL", "o4-mini")
378
+ + # Use specific API key if needed, e.g., ALPAFLOW_OPENAI_API_KEY
379
+ + reasoning_api_key_env = os.getenv("REASONING_TOOL_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
380
+ + reasoning_api_key = os.getenv(reasoning_api_key_env)
381
+ llm = OpenAI(
382
+ - model="o4-mini",
383
+ - api_key=os.getenv("ALPAFLOW_OPENAI_API_KEY"),
384
+ + model=reasoning_llm_model,
385
+ + api_key=reasoning_api_key,
386
+ reasoning_effort="high"
387
+ )
388
+ response = llm.complete(prompt)
389
+ @@ -74,9 +50,10 @@
390
+ """
391
+ Create a pure reasoning agent with no tools, relying solely on chain-of-thought.
392
+ """
393
+ + agent_llm_model = os.getenv("REASONING_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
394
+ llm = GoogleGenAI(
395
+ api_key=os.getenv("GEMINI_API_KEY"),
396
+ - model="models/gemini-1.5-pro",
397
+ + model=agent_llm_model,
398
+ )
399
+
400
+ system_prompt = """\
401
+ ```
402
+
403
+
404
+ ### 3.8. `planner_agent.py` Refactoring
405
+
406
+ * **Rationale:** To improve configuration management and prompt handling.
407
+ * **Proposals:**
408
+ 1. **Configuration:** Move the hardcoded LLM model name (`models/gemini-1.5-pro`) to environment variables or a configuration file.
409
+ 2. **Prompt Management:** Move the system prompt and the prompts within the `plan` and `synthesize_and_respond` functions to separate template files for better readability and maintainability.
410
+
411
+ * **Diff Patch (Illustrative - Configuration & Prompt Loading):**
412
+
413
+ ```diff
414
+ --- a/planner_agent.py
415
+ +++ b/planner_agent.py
416
+ @@ -1,10 +1,19 @@
417
+ import os
418
+ + import logging
419
+ from typing import List, Any
420
+
421
+ from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
422
+ from llama_index.core.tools import FunctionTool
423
+ from llama_index.llms.google_genai import GoogleGenAI
424
+
425
+ + logger = logging.getLogger(__name__)
426
+ +
427
+ + def load_prompt_from_file(filename: str, default_prompt: str) -> str:
428
+ + try:
429
+ + with open(filename, "r") as f:
430
+ + return f.read()
431
+ + except FileNotFoundError:
432
+ + logger.warning(f"Prompt file {filename} not found. Using default.")
433
+ + return default_prompt
434
+ +
435
+ def plan(objective: str) -> List[str]:
436
+ """
437
+ Generate a list of sub-questions from the given objective.
438
+ @@ -15,14 +24,16 @@
439
+ Returns:
440
+ List[str]: A list of sub-steps as strings.
441
+ """
442
+ - input_prompt: str = (
443
+ + default_plan_prompt = (
444
+ "You are a research assistant. "
445
+ "Given an objective, break it down into a list of concise, actionable sub-steps.\n"
446
+ f"Objective: {objective}\n"
447
+ "Sub-steps (one per line):"
448
+ )
449
+ + plan_prompt_template = load_prompt_from_file("planner_plan_prompt.txt", default_plan_prompt)
450
+ + input_prompt = plan_prompt_template.format(objective=objective)
451
+
452
+ + llm_model_name = os.getenv("PLANNER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
453
+ llm = GoogleGenAI(
454
+ api_key=os.getenv("GEMINI_API_KEY"),
455
+ - model="models/gemini-1.5-pro",
456
+ + model=llm_model_name,
457
+ )
458
+
459
+
460
+ @@ -44,13 +55,16 @@
461
+ Returns:
462
+ str: A unified, well-structured response addressing the original objective.
463
+ """
464
+ - # Join each ready-made QA block directly
465
+ summary_blocks = "\n".join(results)
466
+ - input_prompt = f"""You are an expert synthesizer. Given the following sub-questions and their answers,
467
+ + default_synth_prompt = f"""You are an expert synthesizer. Given the following sub-questions and their answers,
468
+ produce a single, coherent, comprehensive report that addresses the original objective:
469
+
470
+ {summary_blocks}
471
+
472
+ Final Report:
473
+ """
474
+ + synth_prompt_template = load_prompt_from_file("planner_synthesize_prompt.txt", default_synth_prompt)
475
+ + input_prompt = synth_prompt_template.format(summary_blocks=summary_blocks)
476
+ +
477
+ + llm_model_name = os.getenv("PLANNER_TOOL_LLM_MODEL", "models/gemini-1.5-pro") # Can use same model as plan
478
+ llm = GoogleGenAI(
479
+ api_key=os.getenv("GEMINI_API_KEY"),
480
+ - model="models/gemini-1.5-pro",
481
+ + model=llm_model_name,
482
+ )
483
+ response = llm.complete(input_prompt)
484
+ return response.text
485
+ @@ -77,9 +91,10 @@
486
+ """
487
+ Initialize a LlamaIndex agent specialized in research planning and question engineering.
488
+ """
489
+ + agent_llm_model = os.getenv("PLANNER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
490
+ llm = GoogleGenAI(
491
+ api_key=os.getenv("GEMINI_API_KEY"),
492
+ - model="models/gemini-1.5-pro",
493
+ + model=agent_llm_model,
494
+ )
495
+
496
+ system_prompt = """\
497
+ @@ -108,6 +123,7 @@
498
+ **Completion & Synthesis**
499
+ If the final result fully completes the original objective, produce a consolidated synthesis of the roadmap and send it as your concluding output.
500
+ """
501
+ + system_prompt = load_prompt_from_file("planner_system_prompt.txt", system_prompt) # Load from file if exists
502
+
503
+ agent = ReActAgent(
504
+ name="planner_agent",
505
+ ```
506
+
507
+
508
+ ### 3.9. `code_agent.py` Refactoring
509
+
510
+ * **Rationale:** To address the critical security vulnerability of the `SimpleCodeExecutor`, improve configuration management, and align code execution with safer practices.
511
+ * **Proposals:**
512
+ 1. **Remove `SimpleCodeExecutor`:** This class and its `execute` method using `subprocess` with raw code strings are fundamentally insecure and **must be removed entirely**.
513
+ 2. **Use `CodeInterpreterToolSpec`:** Rely *exclusively* on the `code_interpreter` tool derived from LlamaIndex's `CodeInterpreterToolSpec` for code execution. This tool is designed for safer, sandboxed execution.
514
+ 3. **Update `CodeActAgent` Initialization:** Remove the `code_execute_fn` parameter when initializing `CodeActAgent`, as the agent should use the provided `code_interpreter` tool for execution via the standard ReAct/Act loop, not a direct execution function.
515
+ 4. **Configuration:** Move hardcoded LLM model names (`o4-mini`, `models/gemini-1.5-pro`) and the API key environment variable name (`ALPAFLOW_OPENAI_API_KEY`) to configuration.
516
+ 5. **Prompt Management:** Move the `generate_python_code` prompt to a separate template file.
517
+
518
+ * **Diff Patch (Illustrative - Security Fix & Configuration):**
519
+
520
+ ```diff
521
+ --- a/code_agent.py
522
+ +++ b/code_agent.py
523
+ @@ -1,5 +1,6 @@
524
+ import os
525
+ import subprocess
526
+ + import logging
527
+
528
+ from llama_index.core.agent.workflow import ReActAgent, CodeActAgent
529
+ from llama_index.core.tools import FunctionTool
530
+ @@ -7,6 +8,16 @@
531
+ from llama_index.llms.openai import OpenAI
532
+ from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
533
+
534
+ + logger = logging.getLogger(__name__)
535
+ +
536
+ + def load_prompt_from_file(filename: str, default_prompt: str) -> str:
537
+ + try:
538
+ + with open(filename, "r") as f:
539
+ + return f.read()
540
+ + except FileNotFoundError:
541
+ + logger.warning(f"Prompt file {filename} not found. Using default.")
542
+ + return default_prompt
543
+ +
544
+ def generate_python_code(prompt: str) -> str:
545
+ """
546
+ Generate valid Python code from a natural language description.
547
+ @@ -27,7 +38,7 @@
548
+ it before execution.
549
+ - This function only generates code and does not execute it.
550
+ """
551
+ -
552
+ - input_prompt = f"""You are also a helpful assistant that writes Python code.
553
+ + default_gen_prompt = f"""You are also a helpful assistant that writes Python code.
554
+ You will be given a prompt and you must generate Python code based on that prompt.
555
+ You must only generate Python code and nothing else.
556
+ Do not include any explanations or any other text.
557
+ @@ -40,10 +51,14 @@
558
+ Code:\n
559
+ """
560
+
561
+ + gen_prompt_template = load_prompt_from_file("code_gen_prompt.txt", default_gen_prompt)
562
+ + input_prompt = gen_prompt_template.format(prompt=prompt)
563
+ +
564
+ + gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
565
+ + gen_api_key_env = os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
566
+ + gen_api_key = os.getenv(gen_api_key_env)
567
+ llm = OpenAI(
568
+ - model="o4-mini",
569
+ - api_key=os.getenv("ALPAFLOW_OPENAI_API_KEY")
570
+ + model=gen_llm_model,
571
+ + api_key=gen_api_key
572
+ )
573
+
574
+ generated_code = llm.complete(input_prompt)
575
+ @@ -74,60 +89,11 @@
576
+ ),
577
+ )
578
+
579
+ -from typing import Any, Dict, Tuple
580
+ -import io
581
+ -import contextlib
582
+ -import ast
583
+ -import traceback
584
+ -
585
+ -
586
+ -class SimpleCodeExecutor:
587
+ - """
588
+ - A simple code executor that runs Python code with state persistence.
589
+ -
590
+ - This executor maintains a global and local state between executions,
591
+ - allowing for variables to persist across multiple code runs.
592
+ -
593
+ - NOTE: not safe for production use! Use with caution.
594
+ - """
595
+ -
596
+ - def __init__(self):
597
+ - pass
598
+ -
599
+ - def execute(self, code: str) -> str:
600
+ - """
601
+ - Execute Python code and capture output and return values.
602
+ -
603
+ - Args:
604
+ - code: Python code to execute
605
+ -
606
+ - Returns:
607
+ - Dict with keys `success`, `output`, and `return_value`
608
+ - """
609
+ - print(f"Executing code: {code}")
610
+ - try:
611
+ - result = subprocess.run(
612
+ - ["python", code],
613
+ - stdout=subprocess.PIPE,
614
+ - stderr=subprocess.PIPE,
615
+ - text=True,
616
+ - timeout=60
617
+ - )
618
+ - if result.returncode != 0:
619
+ - print(f"Execution failed with error: {result.stderr.strip()}")
620
+ - return f"Error: {result.stderr.strip()}"
621
+ - else:
622
+ - output = result.stdout.strip()
623
+ - print(f"Captured Output: {output}")
624
+ - return output
625
+ - except subprocess.TimeoutExpired:
626
+ - print("Execution timed out.")
627
+ - return "Error: Timeout"
628
+ - except Exception as e:
629
+ - print(f"Execution failed with error: {e}")
630
+ - return f"Error: {e}"
631
+ -
632
+ def initialize_code_agent() -> CodeActAgent:
633
+ - code_executor = SimpleCodeExecutor()
634
+ + # DO NOT USE SimpleCodeExecutor - it is insecure.
635
+ + # Rely on the code_interpreter tool provided below.
636
+
637
+ + agent_llm_model = os.getenv("CODE_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
638
+ llm = GoogleGenAI(
639
+ api_key=os.getenv("GEMINI_API_KEY"),
640
+ - model="models/gemini-1.5-pro",
641
+ + model=agent_llm_model,
642
+ )
643
+
644
+ system_prompt = """\
645
+ @@ -151,6 +117,7 @@
646
+ - If further logical reasoning or verification is needed, delegate to **reasoning_agent**.
647
+ - Otherwise, once you have the final code or execution result, pass your output to **planner_agent** for overall synthesis and presentation.
648
+ """
649
+ + system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", system_prompt)
650
+
651
+ agent = CodeActAgent(
652
+ name="code_agent",
653
+ @@ -161,7 +128,7 @@
654
+ "pipelines, and library development, CodeAgent delivers production-ready Python solutions."
655
+ ),
656
+ # REMOVED: code_execute_fn=code_executor.execute, # Use code_interpreter tool instead
657
+ - code_execute_fn=code_executor.execute,
658
+ tools=[
659
+ python_code_generator_tool,
660
+ code_interpreter_tool,
661
+ ```
662
+
663
+
664
+ ### 3.10. `math_agent.py` Refactoring
665
+
666
+ * **Rationale:** To improve configuration management and potentially simplify the tool interface for the LLM.
667
+ * **Proposals:**
668
+ 1. **Configuration:** Move the hardcoded agent LLM model name (`models/gemini-1.5-pro`) to configuration. Ensure the WolframAlpha App ID is configured via environment variable (`WOLFRAM_ALPHA_APP_ID`) as intended.
669
+ 2. **Tool Granularity:** The current approach creates a separate tool for almost every single math function (solve, derivative, integral, add, multiply, inverse, mean, median, etc.). While explicit, this results in a very large number of tools for the `ReActAgent` to manage. Consider:
670
+ * **Grouping:** Group related functions under fewer tools. For example, a `symbolic_math_tool` that takes the operation type (solve, diff, integrate) as a parameter, or a `matrix_ops_tool`.
671
+ * **Natural Language Interface:** Create a single `calculate` tool that takes a natural language math query (e.g., "solve x**2 - 4 = 0 for x", "mean of [1, 2, 3]") and uses an LLM (or rule-based parsing) internally to dispatch to the appropriate NumPy/SciPy/SymPy function. This simplifies the interface for the main agent LLM but adds complexity within the tool.
672
+ * **WolframAlpha Prioritization:** Evaluate if WolframAlpha can handle many of these requests directly, potentially reducing the need for numerous specific SymPy/NumPy tools, especially for symbolic tasks.
673
+ 3. **Truncated File:** Since the original file was truncated, ensure the full file is reviewed if possible, as there might be other issues or tools not seen.
674
+
675
+ * **Diff Patch (Illustrative - Configuration):**
676
+
677
+ ```diff
678
+ --- a/math_agent.py
679
+ +++ b/math_agent.py
680
+ @@ -1,5 +1,6 @@
681
+ import os
682
+ from typing import List, Optional, Union
683
+ + import logging
684
+ import sympy as sp
685
+ import numpy as np
686
+ from llama_index.core.agent.workflow import ReActAgent
687
+ @@ -12,6 +13,8 @@
688
+ from scipy.integrate import odeint
689
+ import numpy.fft as fft
690
+
691
+ + logger = logging.getLogger(__name__)
692
+ +
693
+ # --- Symbolic math functions ---
694
+
695
+
696
+ @@ -451,10 +454,11 @@
697
+
698
+
699
+ def initialize_math_agent() -> ReActAgent:
700
+ + agent_llm_model = os.getenv("MATH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
701
+ llm = GoogleGenAI(
702
+ api_key=os.getenv("GEMINI_API_KEY"),
703
+ - model="models/gemini-1.5-pro",
704
+ + model=agent_llm_model,
705
+ )
706
+
707
+ # Ensure WolframAlpha App ID is set
708
+ ```
709
+
710
+ *(Refactoring proposals section complete)*
711
+
712
+
713
+ ## 4. New Feature Designs
714
+
715
+ This section outlines the design for the new features requested: YouTube Ingestion and Generic Audio Transcription.
716
+
717
+ ### 4.1. YouTube Ingestion
718
+
719
+ * **Rationale:** To enable the framework to process YouTube videos by extracting audio, transcribing it, and summarizing the content, as requested by the user.
720
+ * **Design Proposal:**
721
+ * **Implementation:** Introduce a new dedicated agent, `youtube_agent`, or add tools to the existing `research_agent` or `text_analyzer_agent`. A dedicated agent seems cleaner given the specific multi-step workflow.
722
+ * **Agent (`youtube_agent`):**
723
+ * **Purpose:** Manages the end-to-end process of downloading YouTube audio, chunking, transcribing, and summarizing.
724
+ * **Tools:**
725
+ 1. `download_youtube_audio`: Takes a YouTube URL, uses a library like `yt-dlp` (or potentially `pytube`) to download the audio stream into a temporary file (e.g., `.mp3` or `.opus`). Returns the path to the audio file.
726
+ 2. `chunk_audio_file`: Takes an audio file path and a maximum chunk duration (e.g., 60 seconds). Uses a library like `pydub` or `librosa`+`soundfile` to split the audio into smaller, sequentially numbered temporary files. Returns a list of chunk file paths.
727
+ 3. `transcribe_audio_chunk_gemini`: Takes an audio file path (representing a chunk). Uses the Google Generative AI SDK (`google.generativeai`) to call the Gemini 1.5 Pro model with the audio file for transcription. Returns the transcribed text.
728
+ 4. `summarize_transcript`: Takes the full concatenated transcript text. Uses a Gemini model (e.g., 1.5 Pro or Flash) with a specific prompt to generate a one-paragraph summary. Returns the summary text.
729
+ * **Workflow (ReAct or Function sequence):**
730
+ 1. Receive YouTube URL.
731
+ 2. Call `download_youtube_audio`.
732
+ 3. Call `chunk_audio_file` with the downloaded audio path.
733
+ 4. Iterate through the list of chunk paths:
734
+ * Call `transcribe_audio_chunk_gemini` for each chunk.
735
+ * Collect transcribed text segments.
736
+ 5. Concatenate all transcribed text segments into a full transcript.
737
+ 6. Call `summarize_transcript` with the full transcript.
738
+ 7. Return the full transcript and the summary.
739
+ 8. Clean up temporary audio files (downloaded and chunks).
740
+ * **Handoff:** Could hand off the transcript and summary to `planner_agent` or `text_analyzer_agent` for further processing or integration.
741
+ * **Dependencies:** `yt-dlp`, `pydub` (requires `ffmpeg` or `libav`), `google-generativeai`.
742
+ * **Configuration:** Gemini API Key, chunk duration.
743
+
744
+
745
+ ### 4.2. Generic Audio Transcription
746
+
747
+ * **Rationale:** To provide a flexible audio transcription capability for local files or remote URLs, using Gemini Pro for quality/latency tolerance and Whisper.cpp as a fallback, exposing it via a Python API as requested.
748
+ * **Design Proposal:**
749
+ * **Implementation:** Introduce a new dedicated agent, `transcription_agent`, or add tools to `text_analyzer_agent`. A dedicated agent allows for clearer separation of concerns, especially managing the Whisper.cpp dependency and logic.
750
+ * **Agent (`transcription_agent`):**
751
+ * **Purpose:** Transcribes audio from various sources (local path, URL) using either Gemini or Whisper.cpp based on latency requirements or availability.
752
+ * **Tools:**
753
+ 1. `prepare_audio_source`: Takes a source string (URL or local path). If it's a URL, downloads it to a temporary file using `requests`. Validates the local file path. Returns the path to the local audio file.
754
+ 2. `transcribe_gemini`: Takes an audio file path. Uses the `google-generativeai` SDK to call Gemini 1.5 Pro for transcription. Returns the transcribed text. This is the preferred method when latency is acceptable.
755
+ 3. `transcribe_whisper_cpp`: Takes an audio file path. Uses a Python wrapper around `whisper.cpp` (e.g., installing `whisper.cpp` via `apt` or compiling from source, then using `subprocess` or a dedicated Python binding if available) to perform local transcription. Returns the transcribed text. This is the fallback or low-latency option.
756
+ 4. `choose_transcription_method`: (Internal logic or a simple tool) Takes latency preference (e.g., 'high_quality' vs 'low_latency') or checks Gemini availability/quota. Decides whether to use `transcribe_gemini` or `transcribe_whisper_cpp`.
757
+ * **Workflow (ReAct or Function sequence):**
758
+ 1. Receive audio source (URL/path) and potentially a latency preference.
759
+ 2. Call `prepare_audio_source` to get a local file path.
760
+ 3. Call `choose_transcription_method` (or execute internal logic) to decide between Gemini and Whisper.
761
+ 4. If Gemini: Call `transcribe_gemini`.
762
+ 5. If Whisper: Call `transcribe_whisper_cpp`.
763
+ 6. Return the resulting transcript.
764
+ 7. Clean up temporary downloaded audio file if applicable.
765
+ * **Handoff:** Could hand off the transcript to `planner_agent` or `text_analyzer_agent`.
766
+ * **Python API:**
767
+ * Define a simple Python function (e.g., in a `transcription_api.py` module) that encapsulates the agent's logic or directly calls the underlying transcription functions.
768
+ ```python
769
+ # Example API function in transcription_api.py
770
+ from .transcription_agent import transcribe_audio # Assuming agent logic is refactored
771
+
772
+ def get_transcript(source: str, prefer_gemini: bool = True) -> str:
773
+ """Transcribes audio from a local path or URL.
774
+
775
+ Args:
776
+ source: Path to the local audio file or URL.
777
+ prefer_gemini: If True, attempts to use Gemini Pro first.
778
+ If False or Gemini fails, falls back to Whisper.cpp.
779
+
780
+ Returns:
781
+ The transcribed text.
782
+
783
+ Raises:
784
+ TranscriptionError: If transcription fails.
785
+ """
786
+ # Implementation would call the agent or its refactored functions
787
+ try:
788
+ # Simplified logic - actual implementation needs error handling,
789
+ # Gemini/Whisper selection based on preference/availability
790
+ transcript = transcribe_audio(source, prefer_gemini)
791
+ return transcript
792
+ except Exception as e:
793
+ # Log error
794
+ raise TranscriptionError(f"Failed to transcribe {source}: {e}") from e
795
+
796
+ class TranscriptionError(Exception):
797
+ pass
798
+ ```
799
+ * **Dependencies:** `requests`, `google-generativeai`, `whisper.cpp` (requires separate installation/compilation), potentially Python bindings for `whisper.cpp`.
800
+ * **Configuration:** Gemini API Key, path to `whisper.cpp` executable or library, Whisper model selection.
801
+
802
+
803
+ ## 5. Extra Agent Designs
804
+
805
+ This section proposes three additional specialized agents designed to enhance performance on the GAIA benchmark by addressing common challenges like complex fact verification, interpreting visual data representations, and handling long contexts.
806
+
807
+ ### 5.1. Agent Design 1: Advanced Validation Agent (`validation_agent`)
808
+
809
+ * **Purpose:** To perform rigorous validation of factual claims or intermediate results generated by other agents, going beyond the simple contradiction check of the current `verifier_agent`. This agent aims to improve the accuracy and trustworthiness of the final answer by cross-referencing information and performing checks.
810
+ * **Key Tool Calls:**
811
+ * `web_search` (from `research_agent` or similar): To find external evidence supporting or refuting a claim.
812
+ * `browse_and_extract` (from `research_agent` or similar): To access specific URLs found during search and extract relevant text snippets.
813
+ * `code_interpreter` (from `code_agent`): To perform calculations or simple data manipulations needed for verification (e.g., checking unit conversions, calculating percentages).
814
+ * `knowledge_base_lookup` (New Tool - Optional): Interface with a structured knowledge base (e.g., Wikidata, internal DB) to verify entities, relationships, or properties.
815
+ * `llm_check_consistency` (New Tool or LLM call): Use a powerful LLM with a specific prompt to assess the logical consistency between a claim and a set of provided evidence snippets or existing context.
816
+ * **Agent Loop Sketch (ReAct style):**
817
+ 1. **Input:** A specific claim or statement to validate, along with relevant context or source information.
818
+ 2. **Thought:** Identify the core assertion in the claim. Determine the best validation strategy (e.g., web search for current events, calculation for numerical claims, consistency check for logical statements).
819
+ 3. **Action:** Call the appropriate tool (`web_search`, `code_interpreter`, `llm_check_consistency`).
820
+ 4. **Observation:** Analyze the tool's output (search results, calculation result, consistency assessment).
821
+ 5. **Thought:** Does the observation confirm, refute, or remain inconclusive about the claim? Is more information needed? (e.g., need to browse a specific search result).
822
+ 6. **Action (if needed):** Call another tool (`browse_and_extract`, `llm_check_consistency` with new evidence).
823
+ 7. **Observation:** Analyze new output.
824
+ 8. **Thought:** Synthesize findings. Assign a final validation status (e.g., Confirmed, Refuted, Uncertain) and provide supporting evidence or reasoning.
825
+ 9. **Output:** Validation status and justification.
826
+ 10. **Handoff:** Return result to `planner_agent` or `verifier_agent` (if this agent replaces the contradiction part).
827
+
828
+ ### 5.2. Agent Design 2: Figure Interpretation Agent (`figure_interpretation_agent`)
829
+
830
+ * **Purpose:** To specialize in extracting structured data and meaning from figures, charts, graphs, and tables embedded within images or documents, which are common in GAIA tasks and often require more than just a textual description.
831
+ * **Key Tool Calls:**
832
+ * `image_ocr` (New Tool or enhanced `image_analyzer_agent` capability): High-precision OCR focused on extracting text specifically from figures, including axes labels, legends, titles, and data points.
833
+ * `chart_data_extractor` (New Tool): Utilizes specialized vision models (e.g., DePlot, ChartOCR, or similar fine-tuned models) designed to parse chart types (bar, line, pie) and extract underlying data series or key values.
834
+ * `table_parser` (New Tool): Uses vision or document AI models to detect table structures in images/PDFs and extract cell content into a structured format (e.g., list of lists, Pandas DataFrame via code execution).
835
+ * `code_interpreter` (from `code_agent`): To process extracted data (e.g., load into DataFrame, perform simple analysis, re-plot for verification).
836
+ * `llm_interpret_figure` (New Tool or LLM call): Takes extracted text, data, and potentially the image itself (multimodal) to provide a semantic interpretation of the figure's message or trends.
837
+ * **Agent Loop Sketch (Function sequence or ReAct):**
838
+ 1. **Input:** An image or document page containing a figure/table, potentially with context or a specific question about it.
839
+ 2. **Action:** Call `image_ocr` to get all text elements.
840
+ 3. **Action:** Call `chart_data_extractor` or `table_parser` based on visual analysis (or try both) to get structured data.
841
+ 4. **Action (Optional):** Call `code_interpreter` to load structured data into a DataFrame for easier handling.
842
+ 5. **Action:** Call `llm_interpret_figure`, providing the extracted text, data (raw or DataFrame), and potentially the original image, asking it to answer the specific question or summarize the figure's key insights.
843
+ 6. **Output:** Structured data (if requested) and/or the semantic interpretation/answer.
844
+ 7. **Handoff:** Return results to `planner_agent` or `reasoning_agent`.
845
+
846
+ ### 5.3. Agent Design 3: Long Context Management Agent (`long_context_agent`)
847
+
848
+ * **Purpose:** To effectively manage and query information from very long documents or conversation histories that exceed the context window limits of standard models or require efficient information retrieval techniques.
849
+ * **Key Tool Calls:**
850
+ * `document_chunker` (New Tool): Splits long text into semantically meaningful chunks (e.g., using `SentenceSplitter` from LlamaIndex or more advanced methods).
851
+ * `vector_store_builder` (New Tool): Takes text chunks and builds an in-memory or persistent vector index (using libraries like `llama-index`, `langchain`, `faiss`, `chromadb`).
852
+ * `vector_retriever` (New Tool): Queries the built vector index with a specific question to find the most relevant chunks.
853
+ * `summarizer_tool` (New Tool or LLM call): Generates summaries of long text or selected chunks, potentially using different levels of detail.
854
+ * `contextual_synthesizer` (New Tool or LLM call): Takes retrieved relevant chunks and the original query, then uses an LLM to synthesize an answer grounded in the retrieved context (RAG pattern).
855
+ * **Agent Loop Sketch (Can be stateful):**
856
+ 1. **Input:** A long document (text or path) or a long conversation history, and a specific query or task related to it.
857
+ 2. **(Initialization/First Use):**
858
+ * **Action:** Call `document_chunker`.
859
+ * **Action:** Call `vector_store_builder` to create an index from the chunks. Store the index reference.
860
+ 3. **(Querying):**
861
+ * **Action:** Call `vector_retriever` with the user's query to get relevant chunks.
862
+ * **Action:** Call `contextual_synthesizer`, providing the query and retrieved chunks, to generate the final answer.
863
+ 4. **(Alternative: Summarization Task):**
864
+ * **Action:** Call `summarizer_tool` on the full text (if feasible for the tool) or on retrieved chunks based on a high-level query.
865
+ 5. **Output:** The synthesized answer or the summary.
866
+ 6. **Handoff:** Return results to `planner_agent`.
867
+
868
+
869
+ ## 6. Migration Plan
870
+
871
+ This section details the recommended steps for applying the proposed changes, lists new dependencies, and outlines minimal validation tests.
872
+
873
+ ### 6.1. Order of Implementation
874
+
875
+ It is recommended to apply changes in the following order to minimize disruption and build upon stable foundations:
876
+
877
+ 1. **Core Refactoring (`app.py`, Configuration, Logging):**
878
+ * Implement centralized configuration (e.g., `.env` file) and update all agents to use it for API keys, model names, etc.
879
+ * Integrate Python's `logging` module throughout `app.py` and all agent files, replacing `print` statements.
880
+ * Refactor `app.py`: Implement singleton agent initialization and break down `run_and_submit_all`.
881
+ * Apply structural refactors to agents (class-based structure, avoiding globals) like `role_agent`, `verifier_agent`, `research_agent`.
882
+ 2. **Critical Security Fix (`code_agent`):**
883
+ * Immediately remove the `SimpleCodeExecutor` and modify `code_agent` to rely solely on the `code_interpreter` tool.
884
+ 3. **Core Functionality Refactoring (`verifier_agent`, `math_agent`):**
885
+ * Improve `verifier_agent`'s contradiction detection (e.g., using an LLM or NLI model).
886
+ * Refactor `math_agent` tools if choosing to group them or use a natural language interface.
887
+ 4. **New Feature: Generic Audio Transcription (`transcription_agent`):**
888
+ * Install `whisper.cpp` and its dependencies.
889
+ * Implement the `transcription_agent` and its tools (`prepare_audio_source`, `transcribe_gemini`, `transcribe_whisper_cpp`).
890
+ * Implement the Python API function `get_transcript`.
891
+ 5. **New Feature: YouTube Ingestion (`youtube_agent`):**
892
+ * Install `yt-dlp` and `pydub` (and `ffmpeg`).
893
+ * Implement the `youtube_agent` and its tools (`download_youtube_audio`, `chunk_audio_file`, `transcribe_audio_chunk_gemini`, `summarize_transcript`).
894
+ 6. **New Agent Implementation (Validation, Figure, Long Context):**
895
+ * Implement `validation_agent` and its tools.
896
+ * Implement `figure_interpretation_agent` and its tools (requires sourcing/installing chart/table parsing models/libraries).
897
+ * Implement `long_context_agent` and its tools (requires vector DB setup like `faiss` or `chromadb`).
898
+ 7. **Integration and Workflow Adjustments:**
899
+ * Update `planner_agent`'s system prompt and handoff logic to incorporate the new agents.
900
+ * Update other agents' handoff targets as needed.
901
+ * Update `app.py` if the overall agent initialization or workflow invocation changes.
902
+
903
+ ### 6.2. New Dependencies (`requirements.txt`)
904
+
905
+ Based on the refactoring and new features, the following dependencies might need to be added or updated in `requirements.txt` (or managed via environment setup):
906
+
907
+ * `python-dotenv`: For loading configuration from `.env` files.
908
+ * `google-generativeai`: For interacting with Gemini models (already likely present via `llama-index-llms-google-genai`).
909
+ * `yt-dlp`: For downloading YouTube videos.
910
+ * `pydub`: For audio manipulation (chunking). Requires `ffmpeg` or `libav` system dependency.
911
+ * `llama-index-vector-stores-faiss` / `faiss-cpu` / `faiss-gpu`: For `long_context_agent` vector store (choose one).
912
+ * `chromadb` / `llama-index-vector-stores-chroma`: Alternative vector store for `long_context_agent`.
913
+ * `llama-index-multi-modal-llms-google`: Ensure multimodal support for Gemini is correctly installed.
914
+ * *Possibly*: Libraries for NLI models (e.g., `transformers`, `torch`) if used in `validation_agent`.
915
+ * *Possibly*: Libraries for chart/table parsing (e.g., specific models from Hugging Face, `opencv-python`, `pdf2image`) if implementing `figure_interpretation_agent` tools.
916
+ * *Possibly*: Python bindings for `whisper.cpp` if not using `subprocess`.
917
+
918
+ **System Dependencies:**
919
+
920
+ * `ffmpeg` or `libav`: Required by `pydub`.
921
+ * `whisper.cpp`: Needs to be compiled or installed separately. Follow its specific instructions.
922
+
923
+ ### 6.3. Validation Tests
924
+
925
+ Minimal tests should be implemented to validate key changes:
926
+
927
+ 1. **Configuration:** Test loading of API keys and model names from the configuration source.
928
+ 2. **Logging:** Verify that logs are being generated at the correct levels and formats.
929
+ 3. **`code_agent` Security:** Test that `code_agent` uses `code_interpreter` and *not* the removed `SimpleCodeExecutor`. Attempt a malicious code execution via prompt to ensure it fails safely within the interpreter's sandbox.
930
+ 4. **`verifier_agent` Contradiction:** Test the improved contradiction detection with sample pairs of contradictory and non-contradictory statements.
931
+ 5. **`transcription_agent`:**
932
+ * Test with a short local audio file using both Gemini and Whisper.cpp, comparing output quality/speed.
933
+ * Test with an audio URL.
934
+ * Test the Python API function `get_transcript`.
935
+ 6. **`youtube_agent`:**
936
+ * Test with a short YouTube video URL.
937
+ * Verify audio download, chunking, transcription of chunks, and final summary generation.
938
+ * Check cleanup of temporary files.
939
+ 7. **New Agents (Basic):**
940
+ * For `validation_agent`, `figure_interpretation_agent`, `long_context_agent`, implement basic tests confirming agent initialization and successful calls to their primary new tools with mock inputs/outputs.
941
+ 8. **End-to-End Smoke Test:** Run `app.py` and process one or two simple GAIA tasks that are likely to invoke the refactored components and potentially a new feature (if a relevant task exists) to ensure the overall workflow remains functional.
942
+
943
+ *(Implementation plan complete. Ready for user confirmation.)*
prompts/advanced_validation_agent_prompt.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are AdvancedValidationAgent, a specialized agent focused on rigorously evaluating the accuracy, consistency, and potential biases of information provided by other agents or external sources.
2
+
3
+ **Core Mission:** To act as a critical gatekeeper, ensuring the reliability and trustworthiness of data before it's used for final synthesis or decision-making.
4
+
5
+ **Key Capabilities & Tools:**
6
+
7
+ 1. **`cross_reference_check`**: Given a claim or piece of information and a list of potential source URLs or documents, verify the claim against these sources. Report supporting, contradicting, or inconclusive findings.
8
+ 2. **`logical_consistency_check`**: Analyze a set of statements or a block of text for internal contradictions, logical fallacies, or inconsistencies in reasoning.
9
+ 3. **`bias_detection`**: Examine text for potential biases (e.g., confirmation bias, framing bias, selection bias) based on language, tone, and source context. Identify the type of bias detected.
10
+ 4. **`fact_check_with_search`**: Use external search tools (delegated via handoff to `research_agent` if necessary, or using internal search if available) to verify specific factual claims against reliable web sources.
11
+
12
+ **Workflow:**
13
+
14
+ 1. **Receive Input:** Accept a specific claim, statement, document, or set of findings to validate, along with any relevant context or source information.
15
+ 2. **Select Tool:** Choose the most appropriate validation tool(s) based on the input type and validation goal (e.g., use `cross_reference_check` for source verification, `logical_consistency_check` for reasoning analysis, `bias_detection` for evaluating neutrality, `fact_check_with_search` for specific facts).
16
+ 3. **Execute Tool(s):** Apply the selected tool(s) methodically.
17
+ 4. **Synthesize Findings:** Consolidate the results from the validation checks into a structured report, clearly stating:
18
+ * The original claim/information.
19
+ * The validation methods used.
20
+ * Detailed findings (e.g., supporting evidence, contradictions found, logical flaws identified, biases detected).
21
+ * An overall confidence score or assessment (e.g., High Confidence, Medium Confidence with caveats, Low Confidence/Contradicted).
22
+ 5. **Hand-Off:** Pass the validation report back to the requesting agent (usually `planner_agent` or `reasoning_agent`) for further action.
23
+
24
+ **Constraints:**
25
+
26
+ * Focus solely on validation tasks.
27
+ * Do not generate new content beyond the validation report.
28
+ * Clearly state the limitations of the validation if sources are unavailable or ambiguous.
29
+ * Prioritize accuracy and objectivity in your assessment.
30
+ * Handoff to `research_agent` if external web searching is required for fact-checking beyond provided sources.
31
+
prompts/code_gen_prompt.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are also a helpful assistant that writes Python code.
2
+ You will be given a prompt and you must generate Python code based on that prompt.
3
+ You must only generate Python code and nothing else.
4
+ Do not include any explanations or any other text.
5
+ Do not use any markdown.
6
+ Notes:
7
+ - The generated code may be complex; it is recommended to review and test
8
+ it before execution.
9
+ - This function only generates code and does not execute it.
10
+
11
+ Prompt: {prompt}
12
+
13
+ Code:
14
+
prompts/figure_interpretation_agent_prompt.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are FigureInterpretationAgent, a specialized agent designed to analyze and interpret visual data representations like charts, graphs, diagrams, and tables presented as images.
2
+
3
+ **Core Mission:** To extract meaningful insights, data points, trends, and relationships from visual data formats.
4
+
5
+ **Key Capabilities & Tools:**
6
+
7
+ 1. **`describe_figure`**: Provide a general description of the figure, including its type (e.g., bar chart, line graph, flowchart, table), main elements (axes, labels, legend), and overall topic.
8
+ 2. **`extract_data_points`**: Identify and extract specific data points or values from the figure. This might involve reading values from axes, bars, lines, or table cells. Specify the target data points if possible (e.g., "value for Q3 2024", "maximum value shown").
9
+ 3. **`identify_trends`**: Analyze trends shown in the figure (e.g., increasing/decreasing trends in line graphs, comparisons in bar charts). Describe the observed patterns.
10
+ 4. **`compare_elements`**: Compare different elements within the figure (e.g., compare the heights of two bars, the values of two lines at a specific point, data in different table rows/columns).
11
+ 5. **`summarize_figure_insights`**: Provide a high-level summary of the key insights or the main message conveyed by the figure.
12
+
13
+ **Workflow:**
14
+
15
+ 1. **Receive Input:** Accept an image file containing the figure to be analyzed, along with a specific request (e.g., "describe this chart", "extract the sales figures for 2023", "what is the main trend shown?").
16
+ 2. **Analyze Image:** Utilize multimodal capabilities to visually process the image.
17
+ 3. **Select Tool/Task:** Based on the user request, determine the appropriate analysis task (description, data extraction, trend identification, comparison, summarization).
18
+ 4. **Execute Analysis:** Perform the visual analysis to fulfill the request. This involves interpreting the visual elements and extracting the relevant information.
19
+ 5. **Format Output:** Present the findings clearly and concisely, directly addressing the user's request.
20
+ 6. **Hand-Off:** Pass the interpretation results back to the requesting agent (e.g., `planner_agent`, `research_agent`, `reasoning_agent`).
21
+
22
+ **Constraints:**
23
+
24
+ * Focus solely on interpreting the provided visual data.
25
+ * Do not perform calculations beyond reading values directly from the figure unless explicitly asked and feasible.
26
+ * Acknowledge limitations if the figure is unclear, low-resolution, or lacks necessary labels/context.
27
+ * Base interpretations strictly on the visual information present in the image.
28
+ * Requires multimodal input capabilities to process the image file.
29
+
prompts/image_analyzer_prompt.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are ImageAnalyzerAgent, an expert in cold, factual visual analysis. Your sole mission is to describe and analyze each image with the utmost exhaustiveness, precision, and absence of conjecture. Follow these directives exactly:
2
+
3
+ 1. **Context & Role**
4
+ - You are an automated, impartial analysis system with no emotional or subjective bias.
5
+ - Your objective is to deliver a **purely factual** analysis of the image, avoiding artistic interpretation, author intent, aesthetic judgment, or speculation about non-visible elements.
6
+
7
+ 2. **Analysis Structure**
8
+ Adhere strictly to this order in your output:
9
+
10
+ 1. **General Identification**
11
+ - Output format: “Image received: [filename or path]”.
12
+ - Dimensions (if available): width × height in pixels.
13
+ - File format (JPEG, PNG, GIF, etc.).
14
+
15
+ 2. **Scene Description**
16
+ - Total number of detected objects.
17
+ - Spatial distribution: primary areas of interest (top/left/center, etc.).
18
+
19
+ 3. **Detailed Object List**
20
+ For **each** detected object, provide:
21
+ - **Class/type** (person, animal, vehicle, landscape, text, graphic, etc.).
22
+ - **Exact position**: bounding box coordinates (x_min, y_min, x_max, y_max).
23
+ - **Relative size**: percentage of image area or pixel dimensions.
24
+ - **Dominant color** (for uniform shapes) or top color palette.
25
+ - **Attributes**: posture, orientation, readable text, pattern, state (open/closed, on/off), geometric properties (shape, symmetry).
26
+
27
+ 4. **Color Palette & Composition**
28
+ - **Simplified histogram**: list the 5 most frequent colors in hexadecimal (#RRGGBB) with approximate percentages.
29
+ - **Contrast & brightness**: factual description (e.g., “low overall contrast,” “very dark region in bottom right”).
30
+ - **Visual balance**: symmetric or asymmetric distribution of masses, guiding lines, focal points.
31
+
32
+ 5. **Technical Metrics & Metadata**
33
+ - EXIF data (if available): capture date/time, camera model, aperture, shutter speed, ISO.
34
+ - Effective resolution (DPI/PPI), aspect ratio (4:3, 16:9, square).
35
+
36
+ 6. **Textual Elements**
37
+ - OCR of **all** visible text: exact transcription, approximate font type (serif/sans-serif), relative size.
38
+ - Text layout (alignment, orientation, spacing).
39
+
40
+ 7. **Geometric Analysis**
41
+ - Identify repeating patterns (textures, mosaics, geometric motifs).
42
+ - Measure dominant angles (vertical, horizontal, diagonal lines).
43
+
44
+ 8. **Uncertainty Indicators**
45
+ - For each object or attribute, briefly state confidence level (high/medium/low) based on image clarity (blur, obstruction, low resolution).
46
+ - Example: “Detected ‘bicycle’ with medium confidence (partially blurred).”
47
+
48
+ 9. **Factual Summary**
49
+ - Recap all listed elements without additional commentary.
50
+ - Numbered bullet list, each item prefixed by its category label (e.g., “1. Detected objects: …”, “2. Color palette: …”).
51
+
52
+ 3. **Absolute Constraints**
53
+ - No psychological, symbolic, or subjective interpretation.
54
+ - No value judgments or qualifiers.
55
+ - Never omit any visible object or attribute.
56
+ - Strictly follow the prescribed order and structure without alteration.
57
+
58
+ 4. **Output Format**
59
+ - Plain text only, numbered sections separated by two line breaks.
60
+
61
+ 5. **Agent Handoff**
62
+ Once the image analysis is fully complete, hand off to one of the following agents:
63
+ - **planner_agent** for roadmap creation or final synthesis.
64
+ - **research_agent** for any additional information gathering.
65
+ - **reasoning_agent** for pure chain-of-thought reasoning or deeper logical interpretation.
66
+
67
+ By adhering to these instructions, ensure your visual analysis is cold, factual, comprehensive, and
68
+ completely devoid of subjectivity before handing off.
69
+
prompts/long_context_management_agent_prompt.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are LongContextManagementAgent, a specialized agent responsible for handling and processing extensive textual context, such as long documents, lengthy conversation histories, or large datasets.
2
+
3
+ **Core Mission:** To distill, organize, and query long-form text effectively, enabling other agents to work with manageable and relevant information.
4
+
5
+ **Key Capabilities & Tools:**
6
+
7
+ 1. **`summarize_long_context`**: Generate summaries of long text at different levels of detail (e.g., brief overview, multi-paragraph summary, chapter-level summaries).
8
+ 2. **`extract_key_information`**: Identify and extract specific types of key information from the long context based on a query (e.g., extract all mentions of Project X, find all decisions made in the meeting transcript).
9
+ 3. **`filter_by_relevance`**: Given a query or topic, filter the long context to retain only the most relevant sections or paragraphs.
10
+ 4. **`build_context_index` (Conceptual/Internal):** (Potentially an internal mechanism rather than a direct tool) Create an index (e.g., using LlamaIndex) over the long context to enable efficient querying and retrieval, which might be used by other tools.
11
+ 5. **`query_context_index`**: Answer specific questions based on the information contained within the long context, potentially leveraging an internal index for efficiency.
12
+
13
+ **Workflow:**
14
+
15
+ 1. **Receive Input:** Accept long text content (potentially as a file path or string) and a specific task (e.g., "summarize this document", "find all references to the budget discussion", "answer this question based on the transcript").
16
+ 2. **Pre-process/Index (If applicable):** Load the text. If the task involves querying or repeated access, consider building an internal index for efficiency.
17
+ 3. **Select Tool/Task:** Choose the appropriate tool based on the request (summarization, extraction, filtering, querying).
18
+ 4. **Execute Task:** Apply the selected tool to the long context.
19
+ 5. **Format Output:** Present the results (summary, extracted information, filtered text, query answer) clearly.
20
+ 6. **Hand-Off:** Pass the processed information back to the requesting agent.
21
+
22
+ **Constraints:**
23
+
24
+ * Focus on processing and managing the provided long context.
25
+ * Do not introduce external information unless explicitly part of a query that requires broader context (which might involve handoff).
26
+ * Handle potentially very large inputs efficiently (consider chunking, indexing).
27
+ * Clearly indicate if requested information cannot be found within the provided context.
28
+
prompts/planner_agent_prompt.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are PlannerAgent, a dedicated research strategist and question‐engineer capable of handling text, audio, images, and video inputs.
2
+ Your mission is to transform any high‐level objective into a clear, prioritized roadmap of 4–8 actionable sub‐steps that guide step‐by‐step research or task execution.
3
+
4
+ **Role Assessment**
5
+ First, consider whether a specific role context (e.g., developer, analyst, translator) should be declared at the start to better frame the planning process.
6
+
7
+ **Format**
8
+ Present the final list as a numbered list only, with each item no longer than one sentence and free of extra commentary.
9
+
10
+ **Style**
11
+ Use a formal, professional tone; remain neutral and precise; avoid filler words.
12
+
13
+ **Hand-Off or Self-Answer**
14
+ Once planning is complete, address each sub-question in turn and then hand off as appropriate:
15
+ - For coding tasks, invoke **code_agent**.
16
+ - For web or literature research, invoke **research_agent**.
17
+ - For mathematical analysis, invoke **math_agent**.
18
+ - For assigning roles or contexts, invoke **role_agent**.
19
+ - For deep image analysis, invoke **image_analyzer_agent**.
20
+ - For deep text analysis, invoke **text_analyzer_agent**.
21
+ - For pure chain-of-thought reasoning or logical verification, invoke **reasoning_agent**.
22
+ - If none apply, you may attempt to answer the sub-question yourself.
23
+
24
+ **Agent Constraints**
25
+ Only the following agents are available: **code_agent**, **research_agent**, **math_agent**, **role_agent**, **image_analyzer_agent**, **text_analyzer_agent**, **verifier_agent**, **reasoning_agent**.
26
+ Do not invoke any other agents (e.g., **chess_agent**, **educate_agent**, **game_agent**, etc.).
27
+
28
+ **Finalize**
29
+ After all sub-questions have been addressed—by hand-off or self-answer—compile and present the ultimate, coherent solution yourself using the `synthesize_and_respond` tool.
30
+
31
+ **Completion & Synthesis**
32
+ If the final result fully completes the original objective, produce a consolidated synthesis of the roadmap and send it as your concluding output.
33
+
prompts/reasoning_agent_prompt.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are ReasoningAgent, an advanced cognitive engine specialized in rigorous, step-by-step reasoning.
2
+
3
+ **Tool Usage**
4
+ Always begin by invoking the `reasoning_tool` to perform your internal chain-of-thought reasoning.
5
+ Provide the full context and user question as inputs to `reasoning_tool`.
6
+
7
+ **Post-Reasoning Hand-Off**
8
+ After the `reasoning_tool` returns its output—regardless of the content—you must immediately delegate
9
+ to **planner_agent** for roadmap refinement and final synthesis.
10
+
11
+ **Important**: You have no direct access to external data sources or the internet.
12
+ All reasoning is performed by `reasoning_tool` and then handed off to **planner_agent**.
13
+
prompts/text_analyzer_prompt.txt ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are TextAnalyzerAgent, an expert text‐analysis assistant. On each request—whether raw text or a PDF URL/path—you must:
2
+
3
+ 1. **Determine Input Type**
4
+ - If the input is a URL or a local file path ending in “.pdf”, call `extract_text_from_pdf` with `{"source": <input>}`.
5
+ - Otherwise, treat the input directly as text.
6
+
7
+ 2. **Extract Text (if PDF)**
8
+ Thought: Explain that you are retrieving text from the PDF or accepting raw text.
9
+ Action: extract_text_from_pdf or (skip for raw text)
10
+ Action Input: {"source": <input>}
11
+ Await Observation: the full concatenated text or an error message.
12
+ - If an error occurs, immediately return that error as your Answer.
13
+
14
+ 3. **Analyze Content**
15
+ Thought: Outline that you will produce a summary and list of facts.
16
+ Action: analyze_text
17
+ Action Input: {"text": <extracted_or_raw_text>}
18
+ Await Observation: a plain‐text response with “Summary:” and “Facts:” sections.
19
+
20
+ 4. **Format Response**
21
+ Thought: I can answer without using any more tools.
22
+ Answer:
23
+ Summary:
24
+ • <bullet point 1>
25
+ • <bullet point 2>
26
+ • <bullet point 3>
27
+
28
+ Facts:
29
+ • <fact 1>
30
+ • <fact 2>
31
+ • …
32
+
33
+ 5. **Guidelines**
34
+ - Never include extra sections or commentary.
35
+ - Use exactly one tool per Action.
36
+ - If extraction fails, stop and return the error.
37
+ - Ensure bullets use “• ” and sections are labeled “Summary:” and “Facts:”.
38
+
39
+ 6. **Hand‐Off**
40
+ After delivering your “Summary:” and “Facts:”, pass the extracted facts list to `verifier_agent` for confidence scoring and contradiction detection.
41
+
42
+ Follow this Thought→Action→Observation→… cycle rigorously to produce consistent, reliable analyses.
43
+
pyproject.toml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "gaia-agent"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "certifi>=2025.4.26",
8
+ "datasets>=3.5.1",
9
+ "dotenv>=0.9.9",
10
+ "gradio>=5.28.0",
11
+ "helium>=5.1.1",
12
+ "huggingface>=0.0.1",
13
+ "llama-index>=0.12.33",
14
+ "llama-index-embeddings-huggingface>=0.5.3",
15
+ "llama-index-llms-google-genai>=0.1.9",
16
+ "llama-index-retrievers-bm25>=0.5.2",
17
+ "llama-index-tools-arxiv>=0.3.0",
18
+ "llama-index-tools-code-interpreter>=0.3.0",
19
+ "llama-index-tools-duckduckgo>=0.3.0",
20
+ "llama-index-tools-google>=0.3.0",
21
+ "llama-index-tools-tavily-research>=0.3.0",
22
+ "llama-index-tools-wikipedia>=0.3.0",
23
+ "llama-index-tools-wolfram-alpha>=0.3.0",
24
+ "llama-index-tools-yahoo-finance>=0.3.0",
25
+ "openai-whisper>=20240930",
26
+ "pandas>=2.2.3",
27
+ "requests>=2.32.3",
28
+ "scipy>=1.15.2",
29
+ "sympy>=1.14.0",
30
+ "youtube-transcript-api>=1.0.3",
31
+ ]
todo.md ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GAIA Framework Improvement Plan - ToDo List
2
+
3
+ 1. [X] Create overall output document structure (`gaia_improvement_plan.md`).
4
+ 2. [ ] Generate ASCII diagram of the *revised* architecture (incorporating proposed changes).
5
+ 3. [ ] Perform Code Quality Review:
6
+ * [ ] Review `app.py`
7
+ * [ ] Review `role_agent.py`
8
+ * [ ] Review `image_analyzer_agent.py`
9
+ * [ ] Review `verifier_agent.py`
10
+ * [ ] Review `research_agent.py`
11
+ * [ ] Review `text_analyzer_agent.py`
12
+ * [ ] Review `reasoning_agent.py`
13
+ * [ ] Review `planner_agent.py`
14
+ * [ ] Review `code_agent.py`
15
+ * [ ] Review `math_agent.py` (note truncation)
16
+ * [ ] Consolidate findings for Code Quality section in the report.
17
+ 4. [ ] Develop Refactor Proposals:
18
+ * [ ] Propose refactors for `app.py` (if any) + generate diff.
19
+ * [ ] Propose refactors for `role_agent.py` (if any) + generate diff.
20
+ * [ ] Propose refactors for `image_analyzer_agent.py` (if any) + generate diff.
21
+ * [ ] Propose refactors for `verifier_agent.py` (if any) + generate diff.
22
+ * [ ] Propose refactors for `research_agent.py` (if any) + generate diff.
23
+ * [ ] Propose refactors for `text_analyzer_agent.py` (if any) + generate diff.
24
+ * [ ] Propose refactors for `reasoning_agent.py` (if any) + generate diff.
25
+ * [ ] Propose refactors for `planner_agent.py` (if any) + generate diff.
26
+ * [ ] Propose refactors for `code_agent.py` (if any) + generate diff.
27
+ * [ ] Propose refactors for `math_agent.py` (if any) + generate diff.
28
+ * [ ] Consolidate proposals for Refactoring section in the report.
29
+ 5. [ ] Design New Features:
30
+ * [ ] Design YouTube Ingestion feature (module/agent, steps, tools, API).
31
+ * [ ] Design Generic Audio Transcription feature (module/agent, steps, tools, API, Gemini/Whisper logic).
32
+ * [ ] Document designs in New Features section of the report.
33
+ 6. [ ] Design Extra Agents:
34
+ * [ ] Design Agent 1 (Purpose, Tools, Loop Sketch).
35
+ * [ ] Design Agent 2 (Purpose, Tools, Loop Sketch).
36
+ * [ ] Design Agent 3 (Purpose, Tools, Loop Sketch).
37
+ * [ ] Document designs in Extra Agents section of the report.
38
+ 7. [ ] Create Migration Plan:
39
+ * [ ] Define order of applying changes/features.
40
+ * [ ] List new dependencies for `requirements.txt`.
41
+ * [ ] Outline minimal unit/integration tests for validation.
42
+ * [ ] Document plan in Migration Plan section of the report.
43
+ 8. [ ] Assemble final report (`gaia_improvement_plan.md`).
44
+ 9. [ ] Ask user for confirmation/feedback on the plan before proceeding (as per user's "First action" instruction).
user_requirements.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # User Requirements for GAIA Framework Improvement
2
+
3
+ Based on the provided `pasted_content.txt`, the user's requirements for improving the GAIA multi-agent framework are as follows:
4
+
5
+ ## Overall Goal
6
+
7
+ Improve the existing multi-agent framework to maximize performance on the GAIA benchmark.
8
+
9
+ ## Specific Objectives
10
+
11
+ 1. **Code Quality Review:**
12
+ * Analyze all source files (`.py`, config, tests, docs if present).
13
+ * Identify weaknesses such as:
14
+ * Design smells
15
+ * Performance issues
16
+ * Missing type hints
17
+ * Brittle parsing logic
18
+ * Insufficient test coverage
19
+ * Other reliability impairments.
20
+
21
+ 2. **Refactor Proposals:**
22
+ * For each identified weakness, suggest concise improvements.
23
+ * Provide practical improvements as diff-style patches where feasible.
24
+
25
+ 3. **New Feature Implementation:**
26
+ * **YouTube Ingestion:**
27
+ * Input: YouTube video URL.
28
+ * Process: Download audio -> Chunk audio (≤ 60 seconds) -> Transcribe chunks using Gemini 1.5 Pro.
29
+ * Output: Full transcript and a one-paragraph summary.
30
+ * **Generic Audio Transcription:**
31
+ * Input: Local audio file path or remote audio URL.
32
+ * Process: Transcribe using Gemini 1.5 Pro (preferred for latency tolerance) or Whisper-cpp (fallback).
33
+ * Output: Expose functionality via a simple Python API.
34
+
35
+ 4. **Extra Agent Design:**
36
+ * Design at least three new specialized agents.
37
+ * These agents should demonstrably contribute to boosting GAIA benchmark performance.
38
+ * For each new agent, provide:
39
+ * Purpose
40
+ * Key tool calls
41
+ * Sketch of the agent loop/logic.
42
+
43
+ 5. **Migration Plan:**
44
+ * Define the recommended order for applying the proposed refactor patches and implementing new features.
45
+ * List all new dependencies required for `requirements.txt`.
46
+ * Outline the minimal unit and/or integration tests needed to validate each new feature or significant change.
47
+
48
+ ## Output Format Requirements
49
+
50
+ * Start the final report with a short ASCII diagram representing the *revised* system architecture (incorporating proposed changes).
51
+ * For each modified source file or new module:
52
+ * Provide a one-sentence rationale for the change.
53
+ * Include any required unified diff patches enclosed in triple backticks with the filename in the header.
54
+ * Group related changes together.
55
+ * Use plain paragraphs rather than long bullet lists where appropriate.
56
+ * Maintain concise prose.
57
+ * Ask brief clarifying questions if uncertainties arise during the process.
58
+
59
+ ## Process Constraint
60
+
61
+ * Wait for user confirmation before starting the review.
62
+ * The initial phase involves analysis and presenting the improvement plan (including architecture diagram, code review findings, refactor proposals, new feature designs, extra agent designs, and migration plan).
63
+ * Stop after presenting this initial plan and await further instructions or confirmation.
uv.lock ADDED
The diff for this file is too large to render. See raw diff