Spaces:

Aedelon
/

GAIA_Agent

Running

App Files Files Community

Delanoe Pirard commited on about 1 month ago

Commit

a23082c

1 Parent(s): 78bc862

First commit

Browse files

Files changed (48) hide show

.env +50 -0
.idea/.gitignore +8 -0
.idea/GAIA_Agent.iml +14 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
agents/__init__.py +0 -0
agents/__pycache__/__init__.cpython-311.pyc +0 -0
agents/__pycache__/advanced_validation_agent.cpython-311.pyc +0 -0
agents/__pycache__/code_agent.cpython-311.pyc +0 -0
agents/__pycache__/figure_interpretation_agent.cpython-311.pyc +0 -0
agents/__pycache__/image_analyzer_agent.cpython-311.pyc +0 -0
agents/__pycache__/long_context_management_agent.cpython-311.pyc +0 -0
agents/__pycache__/math_agent.cpython-311.pyc +0 -0
agents/__pycache__/planner_agent.cpython-311.pyc +0 -0
agents/__pycache__/reasoning_agent.cpython-311.pyc +0 -0
agents/__pycache__/research_agent.cpython-311.pyc +0 -0
agents/__pycache__/role_agent.cpython-311.pyc +0 -0
agents/__pycache__/text_analyzer_agent.cpython-311.pyc +0 -0
agents/__pycache__/verifier_agent.cpython-311.pyc +0 -0
agents/advanced_validation_agent.py +404 -0
agents/code_agent.py +193 -0
agents/figure_interpretation_agent.py +303 -0
agents/image_analyzer_agent.py +96 -0
agents/long_context_management_agent.py +452 -0
agents/math_agent.py +696 -0
agents/planner_agent.py +253 -0
agents/reasoning_agent.py +167 -0
agents/research_agent.py +622 -0
agents/role_agent.py +215 -0
agents/text_analyzer_agent.py +388 -0
agents/verifier_agent.py +300 -0
app.py +421 -0
current_architecture.md +91 -0
gaia_improvement_plan.md +943 -0
prompts/advanced_validation_agent_prompt.txt +31 -0
prompts/code_gen_prompt.txt +14 -0
prompts/figure_interpretation_agent_prompt.txt +29 -0
prompts/image_analyzer_prompt.txt +69 -0
prompts/long_context_management_agent_prompt.txt +28 -0
prompts/planner_agent_prompt.txt +33 -0
prompts/reasoning_agent_prompt.txt +13 -0
prompts/text_analyzer_prompt.txt +43 -0
pyproject.toml +31 -0
todo.md +44 -0
user_requirements.md +63 -0
uv.lock +0 -0

.env ADDED Viewed

	@@ -0,0 +1,50 @@

+# Environment variables for GAIA Multi-Agent Framework
+# API Keys
+GEMINI_API_KEY="AIzaSyDOQRtAJd-Kj-H6VT_0t38cZTz4Halgi3U" # For Google AI Studio
+GOOGLE_API_KEY="AIzaSyACcl4uzlyqz4glW-_uCj0xGPSSH0uloAY" # For Google Custom Search JSON API
+GOOGLE_CSE_ID="004c6b8673f0c4dd5" # For Google Custom Search Engine ID
+TAVILY_API_KEY="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" # For Tavily Search API
+ALPAFLOW_OPENAI_API_KEY="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" # For o4-mini model (or other OpenAI compatible endpoint)
+WOLFRAM_ALPHA_APP_ID="YOUR_WOLFRAM_ALPHA_APP_ID" # For WolframAlpha API
+# GAIA Benchmark API
+GAIA_API_URL="https://agents-course-unit4-scoring.hf.space"
+# Model Names (using defaults from original code, can be overridden)
+ROLE_EMBED_MODEL="Snowflake/snowflake-arctic-embed-l-v2.0"
+ROLE_RERANKER_MODEL="Alibaba-NLP/gte-multilingual-reranker-base"
+ROLE_PROMPT_DATASET="fka/awesome-chatgpt-prompts"
+ROLE_LLM_MODEL="models/gemini-1.5-pro"
+IMAGE_ANALYZER_LLM_MODEL="models/gemini-1.5-pro"
+VERIFIER_LLM_MODEL="models/gemini-2.0-flash"
+VERIFIER_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+VERIFIER_CONFIDENCE_THRESHOLD="0.7"
+RESEARCH_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+# RESEARCH_AGENT_CHROME_NO_SANDBOX="true" # Example config for research agent browser
+# RESEARCH_AGENT_CHROME_DISABLE_DEV_SHM="true"
+TEXT_ANALYZER_LLM_MODEL="models/gemini-1.5-pro"
+TEXT_ANALYZER_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+REASONING_TOOL_LLM_MODEL="o4-mini"
+REASONING_TOOL_API_KEY_ENV="ALPAFLOW_OPENAI_API_KEY" # Env var name containing the key for reasoning tool LLM
+REASONING_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+PLANNER_TOOL_LLM_MODEL="models/gemini-1.5-pro"
+PLANNER_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+CODE_GEN_LLM_MODEL="o4-mini"
+CODE_GEN_API_KEY_ENV="ALPAFLOW_OPENAI_API_KEY" # Env var name containing the key for code gen LLM
+CODE_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+MATH_AGENT_LLM_MODEL="models/gemini-1.5-pro"
+# New Feature Config (Placeholders)
+YOUTUBE_CHUNK_DURATION_SECONDS="60"
+TRANSCRIPTION_WHISPER_CPP_PATH="/path/to/whisper.cpp/main" # Example path
+TRANSCRIPTION_WHISPER_MODEL_PATH="/path/to/whisper/model.bin" # Example path

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/GAIA_Agent.iml ADDED Viewed

	@@ -0,0 +1,14 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="uv (GAIA_Agent)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="uv (Zonos)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="uv (GAIA_Agent)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/GAIA_Agent.iml" filepath="$PROJECT_DIR$/.idea/GAIA_Agent.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

agents/__init__.py ADDED Viewed

File without changes

agents/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (163 Bytes). View file

agents/__pycache__/advanced_validation_agent.cpython-311.pyc ADDED Viewed

Binary file (22.8 kB). View file

agents/__pycache__/code_agent.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

agents/__pycache__/figure_interpretation_agent.cpython-311.pyc ADDED Viewed

Binary file (16.5 kB). View file

agents/__pycache__/image_analyzer_agent.cpython-311.pyc ADDED Viewed

Binary file (4.95 kB). View file

agents/__pycache__/long_context_management_agent.cpython-311.pyc ADDED Viewed

Binary file (23.9 kB). View file

agents/__pycache__/math_agent.cpython-311.pyc ADDED Viewed

Binary file (47.5 kB). View file

agents/__pycache__/planner_agent.cpython-311.pyc ADDED Viewed

Binary file (12.5 kB). View file

agents/__pycache__/reasoning_agent.cpython-311.pyc ADDED Viewed

Binary file (8.62 kB). View file

agents/__pycache__/research_agent.cpython-311.pyc ADDED Viewed

Binary file (40.8 kB). View file

agents/__pycache__/role_agent.cpython-311.pyc ADDED Viewed

Binary file (12.7 kB). View file

agents/__pycache__/text_analyzer_agent.cpython-311.pyc ADDED Viewed

Binary file (23.7 kB). View file

agents/__pycache__/verifier_agent.cpython-311.pyc ADDED Viewed

Binary file (17 kB). View file

agents/advanced_validation_agent.py ADDED Viewed

	@@ -0,0 +1,404 @@

+import os
+import logging
+import json
+from typing import List, Dict, Optional, Union
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+# Assuming research_agent might be needed for handoff, but not directly imported
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+# --- Tool Functions ---
+# Note: cross_reference_check might require fetching content.
+# This version assumes content is provided or delegates fetching via handoff.
+def cross_reference_check(claim: str, sources_content: List[Dict[str, str]]) -> Dict[str, Union[str, List[str]]]:
+    """Verifies a claim against provided source content.
+       Args:
+           claim (str): The statement or piece of information to verify.
+           sources_content (List[Dict[str, str]]): A list of dictionaries, each with "url" (optional) and "content" keys.
+       Returns:
+           Dict: A dictionary summarizing findings (supporting, contradicting, inconclusive) per source.
+    """
+    logger.info(f"Cross-referencing claim: {claim[:100]}... against {len(sources_content)} sources.")
+    if not sources_content:
+        return {"error": "No source content provided for cross-referencing."}
+    # LLM configuration
+    llm_model = os.getenv("VALIDATION_LLM_MODEL", "models/gemini-1.5-pro") # Use a capable model
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for cross-referencing LLM.")
+        return {"error": "GEMINI_API_KEY not set."}
+    results = []
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model)
+        logger.info(f"Using cross-referencing LLM: {llm_model}")
+        for i, source in enumerate(sources_content):
+            source_url = source.get("url", f"Source {i+1}")
+            content = source.get("content", "")
+            if not content:
+                logger.warning(f"Source {source_url} has no content.")
+                results.append({"source": source_url, "finding": "inconclusive", "reason": "No content provided"})
+                continue
+            # Truncate long content
+            max_content_len = 15000
+            if len(content) > max_content_len:
+                logger.warning(f"Truncating content from {source_url} to {max_content_len} chars.")
+                content = content[:max_content_len]
+            prompt = (
+                f"Review the following source content and determine if it supports, "
+                f"contradicts, or is inconclusive regarding the claim.\n\n"
+                f"CLAIM: {claim}\n\n"
+                f"SOURCE CONTENT from {source_url}:\n{content}\n\n"
+                f"ANALYSIS: Does the source content directly support the claim, directly contradict it, "
+                f"or provide no relevant information (inconclusive)? "
+                f"Provide a brief reason for your conclusion. Respond in JSON format: "
+                f'{{"finding": "support/contradict/inconclusive", "reason": "Your brief explanation"}}'
+            )
+            response = llm.complete(prompt)
+            try:
+                # Attempt to parse JSON, handle potential markdown fences
+                json_str = response.text.strip()
+                if json_str.startswith("```json"):
+                    json_str = json_str[7:]
+                if json_str.endswith("```"):
+                    json_str = json_str[:-3]
+                finding_data = json.loads(json_str.strip())
+                results.append({
+                    "source": source_url,
+                    "finding": finding_data.get("finding", "error"),
+                    "reason": finding_data.get("reason", "LLM response parsing failed")
+                })
+            except json.JSONDecodeError:
+                logger.error(f"Failed to parse JSON response for source {source_url}: {response.text}")
+                results.append({"source": source_url, "finding": "error", "reason": "LLM response not valid JSON"})
+            except Exception as parse_err:
+                 logger.error(f"Error processing LLM response for source {source_url}: {parse_err}")
+                 results.append({"source": source_url, "finding": "error", "reason": f"Processing error: {parse_err}"})
+        logger.info("Cross-referencing check completed.")
+        return {"claim": claim, "results": results}
+    except Exception as e:
+        logger.error(f"LLM call failed during cross-referencing: {e}", exc_info=True)
+        return {"error": f"Error during cross-referencing: {e}"}
+def logical_consistency_check(text: str) -> Dict[str, Union[bool, str, List[str]]]:
+    """Analyzes text for internal logical contradictions or fallacies using an LLM."""
+    logger.info(f"Checking logical consistency for text (length: {len(text)} chars).")
+    # LLM configuration
+    llm_model = os.getenv("VALIDATION_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for consistency check LLM.")
+        return {"error": "GEMINI_API_KEY not set."}
+    # Truncate long text
+    max_input_chars = 30000
+    if len(text) > max_input_chars:
+        logger.warning(f"Input text truncated to {max_input_chars} chars for consistency check.")
+        text = text[:max_input_chars]
+    prompt = (
+        f"Analyze the following text for logical consistency. Identify any internal contradictions, "
+        f"logical fallacies, or significant inconsistencies in reasoning. "
+        f"If the text is logically consistent, state that clearly. If inconsistencies are found, "
+        f"list them with brief explanations.\n\n"
+        f"TEXT:\n{text}\n\n"
+        f"ANALYSIS: Respond in JSON format: "
+        f'{{"consistent": true/false, "findings": ["Description of inconsistency 1", "Description of inconsistency 2", ...]}}'
+        f"(If consistent is true, findings should be an empty list)."
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model, response_mime_type="application/json")
+        logger.info(f"Using consistency check LLM: {llm_model}")
+        response = llm.complete(prompt)
+        # Attempt to parse JSON
+        json_str = response.text.strip()
+        if json_str.startswith("```json"):
+            json_str = json_str[7:]
+        if json_str.endswith("```"):
+            json_str = json_str[:-3]
+        result_data = json.loads(json_str.strip())
+        # Basic validation
+        if "consistent" not in result_data or "findings" not in result_data:
+             raise ValueError("LLM response missing required keys: consistent, findings")
+        if not isinstance(result_data["findings"], list):
+             raise ValueError("LLM response findings key is not a list")
+        logger.info(f"Logical consistency check completed. Consistent: {result_data.get('consistent')}")
+        return result_data
+    except json.JSONDecodeError as json_err:
+        logger.error(f"Failed to parse JSON response from LLM: {json_err}. Response text: {response.text}")
+        return {"error": f"Failed to parse LLM JSON response: {json_err}"}
+    except ValueError as val_err:
+         logger.error(f"Invalid JSON structure from LLM: {val_err}. Response text: {response.text}")
+         return {"error": f"Invalid JSON structure from LLM: {val_err}"}
+    except Exception as e:
+        logger.error(f"LLM call failed during consistency check: {e}", exc_info=True)
+        return {"error": f"Error during consistency check: {e}"}
+def bias_detection(text: str, source_context: Optional[str] = None) -> Dict[str, Union[bool, List[Dict[str, str]]]]:
+    """Examines text for potential biases using an LLM, considering source context if provided."""
+    logger.info(f"Detecting bias in text (length: {len(text)} chars). Context provided: {source_context is not None}")
+    # LLM configuration
+    llm_model = os.getenv("VALIDATION_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for bias detection LLM.")
+        return {"error": "GEMINI_API_KEY not set."}
+    # Truncate long text/context
+    max_input_chars = 25000
+    if len(text) > max_input_chars:
+        logger.warning(f"Input text truncated to {max_input_chars} chars for bias detection.")
+        text = text[:max_input_chars]
+    if source_context and len(source_context) > 5000:
+         logger.warning(f"Source context truncated to 5000 chars for bias detection.")
+         source_context = source_context[:5000]
+    context_prompt = f"\nSOURCE CONTEXT (optional background about the source):\n{source_context}" if source_context else ""
+    prompt = (
+        f"Analyze the following text for potential cognitive and presentation biases (e.g., confirmation bias, framing, selection bias, loaded language, appeal to emotion). "
+        f"Consider the language, tone, and selection of information. Also consider the source context if provided. "
+        f"If no significant biases are detected, state that clearly. If biases are found, list them, identify the type of bias, and provide a brief explanation with evidence from the text.\n\n"
+        f"TEXT:\n{text}"
+        f"{context_prompt}\n\n"
+        f"ANALYSIS: Respond in JSON format: "
+        f'{{"bias_detected": true/false, "findings": [{{"bias_type": "Type of Bias", "explanation": "Explanation with evidence"}}, ...]}}'
+        f"(If bias_detected is false, findings should be an empty list)."
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model, response_mime_type="application/json")
+        logger.info(f"Using bias detection LLM: {llm_model}")
+        response = llm.complete(prompt)
+        # Attempt to parse JSON
+        json_str = response.text.strip()
+        if json_str.startswith("```json"):
+            json_str = json_str[7:]
+        if json_str.endswith("```"):
+            json_str = json_str[:-3]
+        result_data = json.loads(json_str.strip())
+        # Basic validation
+        if "bias_detected" not in result_data or "findings" not in result_data:
+             raise ValueError("LLM response missing required keys: bias_detected, findings")
+        if not isinstance(result_data["findings"], list):
+             raise ValueError("LLM response findings key is not a list")
+        logger.info(f"Bias detection check completed. Bias detected: {result_data.get('bias_detected')}")
+        return result_data
+    except json.JSONDecodeError as json_err:
+        logger.error(f"Failed to parse JSON response from LLM: {json_err}. Response text: {response.text}")
+        return {"error": f"Failed to parse LLM JSON response: {json_err}"}
+    except ValueError as val_err:
+         logger.error(f"Invalid JSON structure from LLM: {val_err}. Response text: {response.text}")
+         return {"error": f"Invalid JSON structure from LLM: {val_err}"}
+    except Exception as e:
+        logger.error(f"LLM call failed during bias detection: {e}", exc_info=True)
+        return {"error": f"Error during bias detection: {e}"}
+# Note: fact_check_with_search primarily prepares the request for research_agent.
+def fact_check_with_search(claim: str) -> Dict[str, str]:
+    """Prepares a request to fact-check a specific claim using external search.
+       This tool does not perform the search itself but structures the request
+       for handoff to the research_agent.
+       Args:
+           claim (str): The specific factual claim to be checked.
+       Returns:
+           Dict: A dictionary indicating the need for handoff and the query.
+    """
+    logger.info(f"Preparing fact-check request for claim: {claim[:150]}...")
+    # This tool signals the need for handoff to the research agent.
+    # The agent's prompt should guide it to use this tool's output
+    # to formulate the handoff message/query.
+    return {
+        "action": "handoff",
+        "target_agent": "research_agent",
+        "query": f"Fact-check the following claim: {claim}. Provide supporting or contradicting evidence from reliable sources.",
+        "tool_name": "fact_check_with_search" # For context
+    }
+# --- Tool Definitions ---
+cross_reference_tool = FunctionTool.from_defaults(
+    fn=cross_reference_check,
+    name="cross_reference_check",
+    description=(
+        "Verifies a claim against a list of provided source contents (text). "
+        "Input: claim (str), sources_content (List[Dict[str, str]] with 'content' key). "
+        "Output: Dict summarizing findings per source or error."
+    ),
+)
+logical_consistency_tool = FunctionTool.from_defaults(
+    fn=logical_consistency_check,
+    name="logical_consistency_check",
+    description=(
+        "Analyzes text for internal logical contradictions or fallacies. "
+        "Input: text (str). Output: Dict with 'consistent' (bool) and 'findings' (List[str]) or error."
+    ),
+)
+bias_detection_tool = FunctionTool.from_defaults(
+    fn=bias_detection,
+    name="bias_detection",
+    description=(
+        "Examines text for potential biases (cognitive, presentation). "
+        "Input: text (str), Optional: source_context (str). "
+        "Output: Dict with 'bias_detected' (bool) and 'findings' (List[Dict]) or error."
+    ),
+)
+fact_check_tool = FunctionTool.from_defaults(
+    fn=fact_check_with_search,
+    name="fact_check_with_search",
+    description=(
+        "Prepares a request to fact-check a specific claim using external search via the research_agent. "
+        "Input: claim (str). Output: Dict indicating handoff parameters for research_agent."
+    ),
+)
+# --- Agent Initialization ---
+def initialize_advanced_validation_agent() -> ReActAgent:
+    """Initializes the Advanced Validation Agent."""
+    logger.info("Initializing AdvancedValidationAgent...")
+    # Configuration for the agent's main LLM
+    agent_llm_model = os.getenv("VALIDATION_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # Use Pro for main agent logic
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for AdvancedValidationAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for AdvancedValidationAgent")
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Load system prompt
+        default_system_prompt = ("You are AdvancedValidationAgent... [Default prompt content - replace with actual]" # Placeholder
+                              )
+        system_prompt = load_prompt_from_file("../prompts/advanced_validation_agent_prompt.txt", default_system_prompt)
+        if system_prompt == default_system_prompt:
+             logger.warning("Using default/fallback system prompt for AdvancedValidationAgent.")
+        # Define available tools
+        tools = [
+            cross_reference_tool,
+            logical_consistency_tool,
+            bias_detection_tool,
+            fact_check_tool # Tool to initiate handoff for external search
+        ]
+        # Define valid handoff targets
+        valid_handoffs = [
+            "research_agent", # For fact-checking requiring external search
+            "planner_agent", # To return results
+            "reasoning_agent" # To return results
+        ]
+        agent = ReActAgent(
+            name="advanced_validation_agent",
+            description=(
+                "Critically evaluates information for accuracy, consistency, and bias using specialized tools. "
+                "Can cross-reference claims, check logic, detect bias, and initiate external fact-checks via research_agent."
+            ),
+            tools=tools,
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=valid_handoffs,
+            verbose=True # Enable verbose logging
+        )
+        logger.info("AdvancedValidationAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during AdvancedValidationAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running advanced_validation_agent.py directly for testing...")
+    # Check required keys
+    required_keys = ["GEMINI_API_KEY"]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        try:
+            # Test cross-reference tool
+            print("\nTesting cross_reference_check...")
+            test_claim = "The Eiffel Tower is located in Berlin."
+            test_sources = [
+                {"url": "wiki/paris", "content": "Paris is the capital of France, known for the Eiffel Tower."},
+                {"url": "wiki/berlin", "content": "Berlin is the capital of Germany, featuring the Brandenburg Gate."}
+            ]
+            cross_ref_result = cross_reference_check(test_claim, test_sources)
+            print(f"Cross-reference Result:\n{json.dumps(cross_ref_result, indent=2)}")
+            # Test logical consistency tool
+            print("\nTesting logical_consistency_check...")
+            inconsistent_text = "All birds can fly. Penguins are birds. Therefore, penguins can fly."
+            consistency_result = logical_consistency_check(inconsistent_text)
+            print(f"Consistency Result:\n{json.dumps(consistency_result, indent=2)}")
+            # Test bias detection tool
+            print("\nTesting bias_detection...")
+            biased_text = "The revolutionary new policy is clearly the only sensible path forward, despite what uninformed critics might claim."
+            bias_result = bias_detection(biased_text)
+            print(f"Bias Detection Result:\n{json.dumps(bias_result, indent=2)}")
+            # Test fact_check tool (prepares handoff)
+            print("\nTesting fact_check_with_search...")
+            fact_check_prep = fact_check_with_search("Is the Earth flat?")
+            print(f"Fact Check Prep Result:\n{json.dumps(fact_check_prep, indent=2)}")
+            # Initialize the agent (optional)
+            # test_agent = initialize_advanced_validation_agent()
+            # print("\nAdvanced Validation Agent initialized successfully for testing.")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/code_agent.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+import logging
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import CodeActAgent, ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.llms.openai import OpenAI
+from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        # Assuming the prompt file is in the same directory as the agent script
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+def generate_python_code(prompt: str) -> str:
+    """
+    Generate valid Python code from a natural language description using a configured LLM.
+    Args:
+        prompt (str): A clear description of the desired Python code functionality.
+    Returns:
+        str: A string containing the generated Python code.
+    Raises:
+        ValueError: If required API key is not set.
+        Exception: If the LLM call fails.
+    """
+    logger.info(f"Generating Python code for prompt: {prompt[:100]}...")
+    # Configuration for code generation LLM
+    gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
+    gen_api_key_env = os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
+    gen_api_key = os.getenv(gen_api_key_env)
+    if not gen_api_key:
+        logger.error(f"{gen_api_key_env} not found in environment variables for code generation LLM.")
+        raise ValueError(f"{gen_api_key_env} must be set for code generation")
+    # Load the prompt template
+    default_gen_prompt_template = ("You are a helpful assistant that writes Python code. "
+                                   "You will be given a prompt and you must generate Python code based on that prompt. "
+                                   "You must only generate Python code and nothing else. "
+                                   "Do not include any explanations or any other text. "
+                                   "Do not use any markdown. \n"
+                                   "Prompt: {prompt} \n"
+                                   "Code:\n")
+    gen_prompt_template = load_prompt_from_file("../prompts/code_gen_prompt.txt", default_gen_prompt_template)
+    input_prompt = gen_prompt_template.format(prompt=prompt)
+    try:
+        llm = OpenAI(
+            model=gen_llm_model,
+            api_key=gen_api_key
+        )
+        logger.info(f"Using code generation LLM: {gen_llm_model}")
+        generated_code = llm.complete(input_prompt)
+        logger.info("Code generation successful.")
+        return generated_code.text
+    except Exception as e:
+        logger.error(f"LLM call failed during code generation: {e}", exc_info=True)
+        raise # Re-raise the exception to be handled by the agent/workflow
+# --- Tool Definitions ---
+python_code_generator_tool = FunctionTool.from_defaults(
+    fn=generate_python_code,
+    name="python_code_generator",
+    description=(
+        "Generates executable Python code based on a natural language prompt. "
+        "Input: prompt string. Output: Python code string."
+    ),
+)
+# Use LlamaIndex's built-in Code Interpreter Tool Spec for safe execution
+# This assumes the necessary environment (e.g., docker) for the spec is available
+try:
+    code_interpreter_spec = CodeInterpreterToolSpec()
+    # Get the tool(s) from the spec. It might return multiple tools.
+    code_interpreter_tools = code_interpreter_spec.to_tool_list()
+    if not code_interpreter_tools:
+        raise RuntimeError("CodeInterpreterToolSpec did not return any tools.")
+    # Assuming the primary tool is the first one, or find by name if necessary
+    code_interpreter_tool = next((t for t in code_interpreter_tools if t.metadata.name == "code_interpreter"), None)
+    if code_interpreter_tool is None:
+         raise RuntimeError("Could not find 'code_interpreter' tool in CodeInterpreterToolSpec results.")
+    logger.info("CodeInterpreterToolSpec initialized successfully.")
+except Exception as e:
+    logger.error(f"Failed to initialize CodeInterpreterToolSpec: {e}", exc_info=True)
+    # Fallback: Define a dummy tool or raise error to prevent agent start?
+    # For now, let initialization fail if the safe interpreter isn't available.
+    raise RuntimeError("CodeInterpreterToolSpec failed to initialize. Cannot create code_agent.") from e
+# --- REMOVED SimpleCodeExecutor ---
+# The SimpleCodeExecutor class that used subprocess has been entirely removed
+# due to severe security risks. Execution MUST go through the CodeInterpreterToolSpec.
+# --- Agent Initialization ---
+def initialize_code_agent() -> ReActAgent:
+    """Initializes the CodeActAgent, configured for safe code execution."""
+    logger.info("Initializing CodeAgent...")
+    # Configuration for the agent's main LLM
+    agent_llm_model = os.getenv("CODE_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found in environment variables for CodeAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for CodeAgent")
+    try:
+        llm = GoogleGenAI(
+            api_key=gemini_api_key,
+            model=agent_llm_model,
+        )
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Load system prompt (consider loading from file)
+        default_system_prompt = """\
+        You are CodeAgent, a specialist in generating and executing Python code. Your mission:
+        1. **Thought**: Think step-by-step before acting and state your reasoning.
+        2. **Code Generation**: To produce code, call `python_code_generator` with a concise, unambiguous prompt. Review the generated code for correctness and safety.
+        3. **Execution & Testing**: To execute or test code, call `code_interpreter`. Provide the complete code snippet. Analyze its output (stdout, stderr, result) to verify functionality and debug errors.
+        4. **Iteration**: If execution fails or the result is incorrect, analyze the error, think about the fix, generate corrected code using `python_code_generator`, and execute again using `code_interpreter`.
+        5. **Tool Use**: Always adhere strictly to each tool’s input/output format.
+        6. **Final Output**: Once the code works correctly and achieves the goal, output *only* the final functional code or the final execution result, as appropriate for the task.
+        7. **Hand-Off**: If further logical reasoning or verification is needed, delegate to **reasoning_agent**. Otherwise, pass your final output to **planner_agent** for synthesis.
+        """
+        # system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", default_system_prompt)
+        system_prompt = default_system_prompt # Using inline for now
+        agent = ReActAgent(
+            name="code_agent",
+            description=(
+                "Generates Python code using `python_code_generator` and executes it safely using `code_interpreter`. "
+                "Iteratively debugs and refines code based on execution results."
+            ),
+            # REMOVED: code_execute_fn - Execution is handled by the code_interpreter tool via the agent loop.
+            tools=[
+                python_code_generator_tool,
+                code_interpreter_tool, # Use the safe tool from the spec
+            ],
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=["planner_agent", "reasoning_agent"],
+        )
+        logger.info("CodeAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during CodeAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running code_agent.py directly for testing...")
+    # Ensure API keys are set for testing
+    required_keys = ["GEMINI_API_KEY", os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        try:
+            test_agent = initialize_code_agent()
+            print("Code Agent initialized successfully for testing.")
+            # Example test (requires user interaction or pre-defined task)
+            # result = test_agent.chat("Write and execute python code to print 'hello world'")
+            # print(f"Test query result: {result}")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/figure_interpretation_agent.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import os
+import logging
+from typing import List, Dict, Optional, Union
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.schema import ImageDocument
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+# --- Core Figure Interpretation Logic (using Multi-Modal LLM) ---
+def interpret_figure_with_llm(image_path: str, request: str) -> str:
+    """Interprets a figure in an image based on a specific request using a multi-modal LLM.
+       Args:
+           image_path (str): Path to the image file containing the figure.
+           request (str): The specific question or interpretation task (e.g., "Describe this chart",
+                          "Extract sales for Q3", "Identify the main trend").
+       Returns:
+           str: The interpretation result or an error message.
+    """
+    logger.info(f"Interpreting figure in image: {image_path} with request: {request}")
+    # Check if image exists
+    if not os.path.exists(image_path):
+        logger.error(f"Image file not found: {image_path}")
+        return f"Error: Image file not found at {image_path}"
+    # LLM configuration (Must be a multi-modal model)
+    # Ensure the selected model supports image input (e.g., gemini-1.5-pro)
+    llm_model_name = os.getenv("FIGURE_INTERPRETATION_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for figure interpretation LLM.")
+        return "Error: GEMINI_API_KEY not set."
+    try:
+        # Initialize the multi-modal LLM
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model_name)
+        logger.info(f"Using figure interpretation LLM: {llm_model_name}")
+        # Prepare the prompt for the multi-modal LLM
+        # The prompt needs to guide the LLM to act as the figure interpreter
+        # based on the specific request.
+        prompt = (
+            f"You are an expert figure interpreter. Analyze the provided image containing a chart, graph, diagram, or table. "
+            f"Focus *only* on the visual information present in the image. "
+            f"Fulfill the following request accurately and concisely:\n\n"
+            f"REQUEST: {request}\n\n"
+            f"Based *only* on the image, provide the answer:"
+        )
+        # Load the image data (LlamaIndex integration might handle this differently depending on version)
+        # Assuming a method to load image data compatible with the LLM call
+        # This might involve using ImageBlock or similar structures in newer LlamaIndex versions.
+        # For simplicity here, we assume the LLM call can handle a path or loaded image object.
+        # Example using complete (adjust based on actual LlamaIndex multi-modal API)
+        # Note: The exact API for multi-modal completion might vary.
+        # This is a conceptual example.
+        from llama_index.core import SimpleDirectoryReader # Example import
+        # Load the image document
+        reader = SimpleDirectoryReader(input_files=[image_path])
+        image_documents = reader.load_data()
+        if not image_documents or not isinstance(image_documents[0], ImageDocument):
+             logger.error(f"Failed to load image as ImageDocument: {image_path}")
+             return f"Error: Could not load image file {image_path} for analysis."
+        # Make the multi-modal completion call
+        response = llm.complete(
+            prompt=prompt,
+            image_documents=image_documents # Pass the loaded image document(s)
+        )
+        interpretation = response.text.strip()
+        logger.info("Figure interpretation successful.")
+        return interpretation
+    except FileNotFoundError:
+         # This might be redundant due to the initial check, but good practice
+         logger.error(f"Image file not found during LLM call: {image_path}")
+         return f"Error: Image file not found at {image_path}"
+    except ImportError as ie:
+         logger.error(f"Missing library for multi-modal processing: {ie}")
+         return f"Error: Missing required library for image processing ({ie})."
+    except Exception as e:
+        # Catch potential API errors or other issues
+        logger.error(f"LLM call failed during figure interpretation: {e}", exc_info=True)
+        # Check if the error suggests the model doesn't support images
+        if "does not support image input" in str(e).lower():
+             logger.error(f"The configured model {llm_model_name} does not support image input.")
+             return f"Error: The configured LLM ({llm_model_name}) does not support image input. Please configure a multi-modal model."
+        return f"Error during figure interpretation: {e}"
+# --- Tool Definitions (Wrapping the core logic) ---
+# These tools essentially pass the request to the core LLM function.
+def describe_figure_tool_fn(image_path: str) -> str:
+    "Provides a general description of the figure in the image (type, elements, topic)."
+    return interpret_figure_with_llm(image_path, "Describe this figure, including its type, main elements (axes, labels, legend), and overall topic.")
+def extract_data_points_tool_fn(image_path: str, data_request: str) -> str:
+    "Extracts specific data points or values from the figure in the image."
+    return interpret_figure_with_llm(image_path, f"Extract the following data points/values from the figure: {data_request}. If exact values are not clear, provide the closest estimate based on the visual.")
+def identify_trends_tool_fn(image_path: str) -> str:
+    "Identifies and describes trends or patterns shown in the figure in the image."
+    return interpret_figure_with_llm(image_path, "Analyze and describe the main trends or patterns shown in this figure.")
+def compare_elements_tool_fn(image_path: str, comparison_request: str) -> str:
+    "Compares different elements within the figure in the image."
+    return interpret_figure_with_llm(image_path, f"Compare the following elements within the figure: {comparison_request}. Be specific about the comparison based on the visual data.")
+def summarize_figure_insights_tool_fn(image_path: str) -> str:
+    "Summarizes the key insights or main message conveyed by the figure in the image."
+    return interpret_figure_with_llm(image_path, "Summarize the key insights or the main message conveyed by this figure.")
+# --- Tool Definitions for Agent ---
+describe_figure_tool = FunctionTool.from_defaults(
+    fn=describe_figure_tool_fn,
+    name="describe_figure",
+    description="Provides a general description of the figure in the image (type, elements, topic). Input: image_path (str)."
+)
+extract_data_points_tool = FunctionTool.from_defaults(
+    fn=extract_data_points_tool_fn,
+    name="extract_data_points",
+    description="Extracts specific data points/values from the figure. Input: image_path (str), data_request (str)."
+)
+identify_trends_tool = FunctionTool.from_defaults(
+    fn=identify_trends_tool_fn,
+    name="identify_trends",
+    description="Identifies and describes trends/patterns in the figure. Input: image_path (str)."
+)
+compare_elements_tool = FunctionTool.from_defaults(
+    fn=compare_elements_tool_fn,
+    name="compare_elements",
+    description="Compares different elements within the figure. Input: image_path (str), comparison_request (str)."
+)
+summarize_figure_insights_tool = FunctionTool.from_defaults(
+    fn=summarize_figure_insights_tool_fn,
+    name="summarize_figure_insights",
+    description="Summarizes the key insights/main message of the figure. Input: image_path (str)."
+)
+# --- Agent Initialization ---
+def initialize_figure_interpretation_agent() -> ReActAgent:
+    """Initializes the Figure Interpretation Agent."""
+    logger.info("Initializing FigureInterpretationAgent...")
+    # Configuration for the agent's main LLM (can be the same multi-modal one)
+    agent_llm_model = os.getenv("FIGURE_INTERPRETATION_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for FigureInterpretationAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for FigureInterpretationAgent")
+    try:
+        # Agent's LLM doesn't necessarily need to be multi-modal itself,
+        # if the tools handle the multi-modal calls.
+        # However, using a multi-modal one might allow more direct interaction patterns later.
+        llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Load system prompt
+        default_system_prompt = ("You are FigureInterpretationAgent... [Default prompt content - replace with actual]" # Placeholder
+                              )
+        system_prompt = load_prompt_from_file("../prompts/figure_interpretation_agent_prompt.txt", default_system_prompt)
+        if system_prompt == default_system_prompt:
+             logger.warning("Using default/fallback system prompt for FigureInterpretationAgent.")
+        # Define available tools
+        tools = [
+            describe_figure_tool,
+            extract_data_points_tool,
+            identify_trends_tool,
+            compare_elements_tool,
+            summarize_figure_insights_tool
+        ]
+        # Define valid handoff targets
+        valid_handoffs = [
+            "planner_agent", # To return results
+            "research_agent", # If context from figure needs further research
+            "reasoning_agent" # If interpretation needs logical analysis
+        ]
+        agent = ReActAgent(
+            name="figure_interpretation_agent",
+            description=(
+                "Analyzes and interprets visual data representations (charts, graphs, tables) from image files. "
+                "Can describe figures, extract data, identify trends, compare elements, and summarize insights."
+            ),
+            tools=tools,
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=valid_handoffs,
+            # Note: This agent inherently requires multi-modal input capabilities,
+            # which are handled within its tools via a multi-modal LLM.
+        )
+        logger.info("FigureInterpretationAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during FigureInterpretationAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running figure_interpretation_agent.py directly for testing...")
+    # Check required keys
+    required_keys = ["GEMINI_API_KEY"]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        # Check if a multi-modal model is likely configured (heuristic)
+        model_name = os.getenv("FIGURE_INTERPRETATION_LLM_MODEL", "models/gemini-1.5-pro")
+        if "pro" not in model_name.lower() and "vision" not in model_name.lower():
+             print(f"Warning: Configured LLM {model_name} might not support image input. Tests may fail.")
+        # Create a dummy image file for testing (requires Pillow)
+        dummy_image_path = "dummy_figure.png"
+        try:
+            from PIL import Image, ImageDraw, ImageFont
+            img = Image.new('RGB', (400, 200), color = (255, 255, 255))
+            d = ImageDraw.Draw(img)
+            # Try to load a default font, handle if not found
+            try:
+                 font = ImageFont.truetype("arial.ttf", 15) # Common font, might not exist
+            except IOError:
+                 font = ImageFont.load_default()
+                 print("Arial font not found, using default PIL font.")
+            d.text((10,10), "Simple Bar Chart", fill=(0,0,0), font=font)
+            d.rectangle([50, 50, 100, 150], fill=(255,0,0)) # Bar 1
+            d.text((60, 160), "A", fill=(0,0,0), font=font)
+            d.rectangle([150, 80, 200, 150], fill=(0,0,255)) # Bar 2
+            d.text((160, 160), "B", fill=(0,0,0), font=font)
+            img.save(dummy_image_path)
+            print(f"Created dummy image file: {dummy_image_path}")
+            # Test the tools directly
+            print("\nTesting describe_figure...")
+            desc = describe_figure_tool_fn(dummy_image_path)
+            print(f"Description: {desc}")
+            print("\nTesting extract_data_points (qualitative)...")
+            extract_req = "Height of bar A vs Bar B" # Qualitative request
+            extract_res = extract_data_points_tool_fn(dummy_image_path, extract_req)
+            print(f"Extraction Result: {extract_res}")
+            print("\nTesting compare_elements...")
+            compare_req = "Compare bar A and bar B"
+            compare_res = compare_elements_tool_fn(dummy_image_path, compare_req)
+            print(f"Comparison Result: {compare_res}")
+            # Clean up dummy image
+            os.remove(dummy_image_path)
+        except ImportError:
+            print("Pillow library not installed. Skipping direct tool tests that require image creation.")
+            # Optionally, still try initializing the agent
+            try:
+                 test_agent = initialize_figure_interpretation_agent()
+                 print("\nFigure Interpretation Agent initialized successfully (tool tests skipped).")
+            except Exception as e:
+                 print(f"Error initializing agent: {e}")
+        except Exception as e:
+            print(f"Error during testing: {e}")
+            if os.path.exists(dummy_image_path):
+                 os.remove(dummy_image_path) # Ensure cleanup on error

agents/image_analyzer_agent.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+import logging
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import FunctionAgent
+from llama_index.llms.google_genai import GoogleGenAI
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Helper function to load prompt from file
+def load_prompt_from_file(filename="../prompts/image_analyzer_prompt.txt") -> str:
+    """Loads the system prompt from a text file."""
+    try:
+        # Assuming the prompt file is in the same directory as the agent script
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded system prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.error(f"Prompt file {filename} not found at {prompt_path}. Using fallback prompt.")
+        # Fallback minimal prompt
+        return "You are an image analyzer. Describe the image factually."
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return "You are an image analyzer. Describe the image factually."
+def initialize_image_analyzer_agent() -> FunctionAgent:
+    """
+    Create an agent that orchestrates image analysis.
+    Uses Gemini Pro multimodal capabilities directly without explicit tools.
+    Configuration and prompt are loaded from environment/file.
+    """
+    logger.info("Initializing ImageAnalyzerAgent...")
+    # Configuration from environment variables
+    llm_model_name = os.getenv("IMAGE_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found in environment variables.")
+        raise ValueError("GEMINI_API_KEY must be set")
+    try:
+        llm = GoogleGenAI(
+            api_key=gemini_api_key,
+            model=llm_model_name,
+        )
+        logger.info(f"Using LLM model: {llm_model_name}")
+        # Load system prompt from file
+        system_prompt = load_prompt_from_file()
+        # Note: This agent is a FunctionAgent but doesn't explicitly define tools.
+        # It relies on the LLM (Gemini 1.5 Pro) to understand the system prompt
+        # and perform the analysis when an image is passed in the ChatMessage blocks.
+        agent = FunctionAgent(
+            name="image_analyzer_agent",
+            description=(
+                "ImageAnalyzerAgent inspects image files using its multimodal capabilities, "
+                "interpreting the visual content according to a detailed factual analysis prompt."
+            ),
+            llm=llm,
+            system_prompt=system_prompt,
+            # No explicit tools needed if relying on direct multimodal LLM call
+            # tools=[],
+            can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"],
+        )
+        logger.info("ImageAnalyzerAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during ImageAnalyzerAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running image_analyzer_agent.py directly for testing...")
+    # Ensure API key is set for testing
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
+    else:
+        try:
+            test_agent = initialize_image_analyzer_agent()
+            print("Image Analyzer Agent initialized successfully for testing.")
+            # To test further, you would need to construct a ChatMessage with an ImageBlock
+            # and run agent.chat(message)
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/long_context_management_agent.py ADDED Viewed

	@@ -0,0 +1,452 @@

+import os
+import logging
+import json
+from typing import List, Dict, Optional, Union, Literal
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool, QueryEngineTool
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.core import Document, VectorStoreIndex, Settings
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.core.retrievers import VectorIndexRetriever
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Configure LlamaIndex Settings (optional, but good practice)
+# Ensure embedding model is set if not using default OpenAI
+# Settings.embed_model = ... # Example: HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+# Settings.llm = ... # Can set a default LLM here if needed
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+# --- Internal Context Index Management ---
+# Store index and text globally for simplicity in this example
+# In a real application, consider a more robust state management approach
+_context_index: Optional[VectorStoreIndex] = None
+_context_text: Optional[str] = None
+_context_source: Optional[str] = None # e.g., filename or description
+def _build_or_get_index(text: Optional[str] = None, source: Optional[str] = "loaded_context") -> Optional[VectorStoreIndex]:
+    """Builds or retrieves the VectorStoreIndex for the loaded context."""
+    global _context_index, _context_text, _context_source
+    if text is not None and (text != _context_text or _context_index is None):
+        logger.info(f"Building new context index from text (length: {len(text)} chars). Source: {source}")
+        _context_text = text
+        _context_source = source
+        try:
+            # Use SentenceSplitter for chunking
+            splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=200)
+            Settings.node_parser = splitter # Set globally or pass to index construction
+            documents = [Document(text=_context_text)]
+            _context_index = VectorStoreIndex.from_documents(documents, show_progress=True)
+            logger.info("Context index built successfully.")
+        except Exception as e:
+            logger.error(f"Failed to build context index: {e}", exc_info=True)
+            _context_index = None
+            _context_text = None
+            _context_source = None
+            return None
+    elif _context_index is None:
+        logger.warning("No context loaded or index built yet.")
+        return None
+    return _context_index
+def load_text_context(text: str, source: str = "provided_text") -> str:
+    """Loads text into the agent's context and builds an index. Replaces existing context."""
+    logger.info(f"Loading new text context (length: {len(text)} chars). Source: {source}")
+    index = _build_or_get_index(text=text, source=source)
+    if index:
+        return f"Successfully loaded and indexed text context from {source} (Length: {len(text)} chars)."
+    else:
+        return "Error: Failed to load or index the provided text context."
+# --- Tool Functions ---
+def summarize_long_context(detail_level: Literal["brief", "standard", "detailed"] = "standard",
+                           max_length: Optional[int] = None,
+                           min_length: Optional[int] = None) -> str:
+    """Summarizes the currently loaded long text context.
+       Args:
+           detail_level (str): Level of detail: "brief" (1-2 sentences), "standard" (1-2 paragraphs), "detailed" (multiple paragraphs).
+           max_length (Optional[int]): Approximate maximum words (overrides detail_level if set).
+           min_length (Optional[int]): Approximate minimum words.
+       Returns:
+           str: The summary or an error message.
+    """
+    global _context_text, _context_source
+    if _context_text is None:
+        return "Error: No long context has been loaded yet. Use 'load_text_context' first."
+    logger.info(f"Summarizing loaded context (Source: {_context_source}, Length: {len(_context_text)} chars). Detail: {detail_level}")
+    # Determine length guidance based on detail_level if max/min not set
+    if max_length is None:
+        if detail_level == "brief":
+            max_length = 50
+            min_length = min_length or 10
+        elif detail_level == "detailed":
+            max_length = 500
+            min_length = min_length or 150
+        else: # standard
+            max_length = 200
+            min_length = min_length or 50
+    min_length = min_length or int(max_length * 0.3) # Default min length
+    # LLM configuration
+    llm_model = os.getenv("CONTEXT_LLM_MODEL", "models/gemini-1.5-pro") # Use Pro for potentially long context
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for summarization LLM.")
+        return "Error: GEMINI_API_KEY not set."
+    # Truncate input text only if extremely long, as Pro handles large contexts
+    # Let the LLM handle context window limits if possible
+    # max_input_chars = 100000 # Example high limit
+    # text_to_summarize = _context_text[:max_input_chars] if len(_context_text) > max_input_chars else _context_text
+    text_to_summarize = _context_text # Rely on LLM context window
+    prompt = (
+        f"Summarize the following text concisely, focusing on the main points and key information. "
+        f"Aim for a length between {min_length} and {max_length} words. "
+        f"The requested level of detail is '{detail_level}'.\n\n"
+        f"TEXT:\n{text_to_summarize}\n\nSUMMARY:"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=llm_model)
+        logger.info(f"Using summarization LLM: {llm_model}")
+        response = llm.complete(prompt)
+        summary = response.text.strip()
+        logger.info(f"Summarization successful (output length: {len(summary.split())} words).")
+        return summary
+    except Exception as e:
+        logger.error(f"LLM call failed during summarization: {e}", exc_info=True)
+        return f"Error during summarization: {e}"
+def extract_key_information(query: str, max_results: int = 10) -> Union[List[str], str]:
+    """Extracts specific information or answers a question based on the loaded long context using the index.
+       Args:
+           query (str): The question or description of information to extract (e.g., "List all decisions made", "What was mentioned about Project X?").
+           max_results (int): Maximum number of distinct pieces of information or text snippets to return.
+       Returns:
+           List[str]: A list of extracted text snippets or answers, or str: Error message.
+    """
+    logger.info(f"Extracting information for query: {query} from loaded context. Max results: {max_results}")
+    index = _build_or_get_index() # Get existing index
+    if index is None:
+        return "Error: No context loaded or index available. Use 'load_text_context' first."
+    try:
+        # Use a query engine for extraction
+        # Configure retriever for potentially broader search
+        retriever = VectorIndexRetriever(index=index, similarity_top_k=max_results * 2) # Retrieve more initially
+        # Configure response synthesis (optional, can customize prompt)
+        # response_synthesizer = ...
+        query_engine = RetrieverQueryEngine.from_args(retriever=retriever,
+                                                      # response_synthesizer=response_synthesizer,
+                                                      # llm=Settings.llm # Use default or specify
+                                                      )
+        # Formulate a prompt that encourages extraction rather than synthesis if needed
+        extraction_prompt = f"Based *only* on the provided context, extract the key information or answer the following query. List distinct findings or provide relevant text snippets. Query: {query}"
+        response = query_engine.query(extraction_prompt)
+        # Process response - might need refinement based on LLM output format
+        # Assuming response.response contains the extracted info, potentially needing splitting
+        # This part is heuristic and depends on how the LLM responds to the extraction prompt.
+        extracted_items = [item.strip() for item in response.response.split("\n") if item.strip()]
+        # Limit results if necessary
+        final_results = extracted_items[:max_results]
+        logger.info(f"Extraction successful. Found {len(final_results)} items.")
+        return final_results if final_results else ["No specific information found matching the query in the context."]
+    except Exception as e:
+        logger.error(f"Error during information extraction: {e}", exc_info=True)
+        return f"Error during extraction: {e}"
+def filter_by_relevance(topic: str, threshold: float = 0.75) -> str:
+    """Filters the loaded long context, retaining sections relevant to the topic using the index.
+       Args:
+           topic (str): The topic or query to filter relevance by.
+           threshold (float): Similarity threshold (0.0 to 1.0) for relevance. Higher means more strict.
+       Returns:
+           str: The filtered text containing only relevant sections, or an error message.
+    """
+    logger.info(f"Filtering loaded context for relevance to topic: {topic}. Threshold: {threshold}")
+    index = _build_or_get_index() # Get existing index
+    if index is None:
+        return "Error: No context loaded or index available. Use 'load_text_context' first."
+    try:
+        retriever = VectorIndexRetriever(index=index, similarity_top_k=20) # Retrieve a decent number of candidates
+        retrieved_nodes = retriever.retrieve(topic)
+        relevant_texts = []
+        for node_with_score in retrieved_nodes:
+            if node_with_score.score >= threshold:
+                relevant_texts.append(node_with_score.node.get_content())
+            else:
+                # Since results are ordered by score, we can stop early
+                break
+        if not relevant_texts:
+            logger.info("No sections found meeting the relevance threshold.")
+            return "No content found matching the specified relevance threshold for the topic."
+        # Combine relevant sections (consider adding separators)
+        filtered_text = "\n\n---\n\n".join(relevant_texts)
+        logger.info(f"Filtering successful. Combined relevant text length: {len(filtered_text)} chars.")
+        return filtered_text
+    except Exception as e:
+        logger.error(f"Error during relevance filtering: {e}", exc_info=True)
+        return f"Error during filtering: {e}"
+def query_context_index(query: str) -> str | None:
+    """Answers a specific question based on the information contained within the loaded long context using the index.
+       Args:
+           query (str): The question to answer.
+       Returns:
+           str: The answer derived from the context, or an error/"not found" message.
+    """
+    logger.info(f"Querying loaded context index with: {query}")
+    index = _build_or_get_index() # Get existing index
+    if index is None:
+        return "Error: No context loaded or index available. Use 'load_text_context' first."
+    try:
+        query_engine = index.as_query_engine(similarity_top_k=5) # Default query engine
+        response = query_engine.query(query)
+        answer = response.response.strip()
+        logger.info("Context query successful.")
+        # Check if the LLM indicated it couldn't answer
+        if "don't know" in answer.lower() or "no information" in answer.lower() or "context does not mention" in answer.lower():
+             logger.warning(f"Query response suggests information not found: {answer}")
+             return f"The loaded context does not seem to contain the answer to: {query}"
+        return answer
+    except Exception as e:
+        logger.error(f"Error during context query: {e}", exc_info=True)
+        return f"Error querying context: {e}"
+# --- Tool Definitions ---
+load_context_tool = FunctionTool.from_defaults(
+    fn=load_text_context,
+    name="load_text_context",
+    description=(
+        "Loads/replaces the long text context for the agent and builds an internal index. "
+        "Input: text (str), Optional: source (str). Output: Status message (str)."
+    ),
+)
+summarize_context_tool = FunctionTool.from_defaults(
+    fn=summarize_long_context,
+    name="summarize_long_context",
+    description=(
+        "Summarizes the currently loaded long text context. "
+        "Input: Optional: detail_level ('brief', 'standard', 'detailed'), max_length (int), min_length (int). Output: Summary (str) or error."
+    ),
+)
+extract_info_tool = FunctionTool.from_defaults(
+    fn=extract_key_information,
+    name="extract_key_information",
+    description=(
+        "Extracts specific information or answers questions from the loaded context using its index. "
+        "Input: query (str), Optional: max_results (int). Output: List[str] of findings or error string."
+    ),
+)
+filter_context_tool = FunctionTool.from_defaults(
+    fn=filter_by_relevance,
+    name="filter_by_relevance",
+    description=(
+        "Filters the loaded context to retain only sections relevant to a topic, using the index. "
+        "Input: topic (str), Optional: threshold (float 0-1). Output: Filtered text (str) or error."
+    ),
+)
+query_context_tool = FunctionTool.from_defaults(
+    fn=query_context_index,
+    name="query_context_index",
+    description=(
+        "Answers a specific question based *only* on the loaded long context using its index. "
+        "Input: query (str). Output: Answer (str) or error/'not found' message."
+    ),
+)
+# --- Agent Initialization ---
+def initialize_long_context_management_agent() -> ReActAgent:
+    """Initializes the Long Context Management Agent."""
+    logger.info("Initializing LongContextManagementAgent...")
+    # Configuration for the agent's main LLM
+    agent_llm_model = os.getenv("CONTEXT_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # Needs to handle planning
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for LongContextManagementAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for LongContextManagementAgent")
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        Settings.llm = llm # Set default LLM for LlamaIndex components used by tools
+        # Load system prompt
+        default_system_prompt = ("You are LongContextManagementAgent... [Default prompt content - replace with actual]" # Placeholder
+                              )
+        system_prompt = load_prompt_from_file("../prompts/long_context_management_agent_prompt.txt", default_system_prompt)
+        if system_prompt == default_system_prompt:
+             logger.warning("Using default/fallback system prompt for LongContextManagementAgent.")
+        # Define available tools
+        tools = [
+            load_context_tool,
+            summarize_context_tool,
+            extract_info_tool,
+            filter_context_tool,
+            query_context_tool
+        ]
+        # Define valid handoff targets
+        valid_handoffs = [
+            "planner_agent", # To return results
+            "text_analyzer_agent", # If further analysis of extracted/filtered text is needed
+            "reasoning_agent"
+        ]
+        agent = ReActAgent(
+            name="long_context_management_agent",
+            description=(
+                "Manages and processes long textual context. Can load text (`load_text_context`), summarize (`summarize_long_context`), "
+                "extract key info (`extract_key_information`), filter by relevance (`filter_by_relevance`), and answer questions based on the context (`query_context_index`)."
+            ),
+            tools=tools,
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=valid_handoffs,
+        )
+        logger.info("LongContextManagementAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during LongContextManagementAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    # Set LlamaIndex log level higher to reduce noise during testing
+    logging.getLogger("llama_index.core.indices.vector_store").setLevel(logging.WARNING)
+    logging.getLogger("llama_index.core.query_engine").setLevel(logging.WARNING)
+    logging.getLogger("llama_index.core.token_counter").setLevel(logging.ERROR) # Suppress token counting logs
+    logger.info("Running long_context_management_agent.py directly for testing...")
+    # Check required keys
+    required_keys = ["GEMINI_API_KEY"]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        try:
+            # Example long text
+            long_text = """
+            Meeting Minutes - Project Phoenix - April 28, 2025
+            Attendees: Alice, Bob, Charlie, David
+            Agenda: Review Q1 results, Plan Q2 roadmap, Budget allocation
+            Q1 Results Discussion:
+            Alice presented the sales figures. Sales increased by 15% compared to Q4 2024, exceeding the target of 10%.
+            Bob highlighted the success of the marketing campaign launched in February. Customer acquisition cost decreased by 5%.
+            Charlie noted a slight dip in user engagement metrics in March, possibly due to a recent UI change.
+            Action Item: David to investigate the engagement dip.
+            Q2 Roadmap Planning:
+            The team discussed potential features for Q2. Feature A (enhanced reporting) was prioritized.
+            Feature B (mobile app improvements) was deferred to Q3.
+            Alice emphasized the need for stability improvements. Bob suggested focusing on performance optimization.
+            Decision: Q2 focus will be on Feature A and performance/stability improvements.
+            Budget Allocation:
+            Charlie presented the proposed budget.
+            An additional $50,000 was requested for cloud infrastructure scaling due to increased usage.
+            David questioned the necessity of the full amount.
+            After discussion, the team approved an additional $40,000 for infrastructure.
+            Decision: Allocate $40,000 extra for Q2 infrastructure.
+            Next Steps:
+            David to report on engagement metrics by May 5th.
+            Alice to finalize Q2 feature specifications by May 10th.
+            Meeting adjourned.
+            """ * 5 # Make it longer
+            # Test loading context
+            print("\nTesting load_text_context...")
+            load_status = load_text_context(long_text, source="Meeting Minutes Test")
+            print(load_status)
+            if "Error" not in load_status:
+                # Test summarization
+                print("\nTesting summarize_long_context (brief)...")
+                summary_brief = summarize_long_context(detail_level="brief")
+                print(f"Brief Summary: {summary_brief}")
+                # Test extraction
+                print("\nTesting extract_key_information (decisions)...")
+                decisions = extract_key_information(query="List all decisions made in the meeting")
+                print(f"Decisions Extracted: {decisions}")
+                # Test filtering
+                print("\nTesting filter_by_relevance (budget)...")
+                budget_text = filter_by_relevance(topic="budget allocation", threshold=0.7)
+                print(f"Filtered Budget Text (first 300 chars):\n{budget_text[:300]}...")
+                # Test querying
+                print("\nTesting query_context_index (Q1 sales)...")
+                sales_query = "What was the sales increase in Q1?"
+                sales_answer = query_context_index(sales_query)
+                print(f"Answer to '{sales_query}': {sales_answer}")
+                print("\nTesting query_context_index (non-existent info)...")
+                non_existent_query = "Who is the CEO?"
+                non_existent_answer = query_context_index(non_existent_query)
+                print(f"Answer to '{non_existent_query}': {non_existent_answer}")
+            # Initialize the agent (optional)
+            # test_agent = initialize_long_context_management_agent()
+            # print("\nLong Context Management Agent initialized successfully for testing.")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/math_agent.py ADDED Viewed

	@@ -0,0 +1,696 @@

+import os
+import logging
+from typing import List, Optional, Union, Dict
+from dotenv import load_dotenv
+import sympy as sp
+import numpy as np
+import scipy.linalg as la
+import scipy.special as special
+from scipy.integrate import odeint, quad
+from scipy.stats import binom, norm, poisson
+import numpy.fft as fft
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# --- Math Tool Functions (with enhanced logging and error handling) ---
+# Helper decorator for error handling and logging
+def math_tool_handler(func):
+    def wrapper(*args, **kwargs):
+        func_name = func.__name__
+        logger.info(f"Executing math tool: {func_name} with args: {args}, kwargs: {kwargs}")
+        try:
+            result = func(*args, **kwargs)
+            logger.info(f"Tool {func_name} executed successfully. Result: {str(result)[:200]}...")
+            # Ensure result is serializable (convert numpy types if necessary)
+            if isinstance(result, np.ndarray):
+                return result.tolist()
+            if isinstance(result, (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)):
+                 return int(result)
+            if isinstance(result, (np.float_, np.float16, np.float32, np.float64)):
+                 return float(result)
+            if isinstance(result, (np.complex_, np.complex64, np.complex128)):
+                 return complex(result)
+            if isinstance(result, np.bool_):
+                 return bool(result)
+            if isinstance(result, dict):
+                 return {k: wrapper(v) if isinstance(v, (np.ndarray, np.number, np.bool_)) else v for k, v in result.items()} # Recursively handle dicts
+            return result
+        except (sp.SympifyError, TypeError, ValueError, np.linalg.LinAlgError, ZeroDivisionError) as e:
+            logger.warning(f"Math error in {func_name}: {e}")
+            return f"Error in {func_name}: {e}"
+        except Exception as e:
+            logger.error(f"Unexpected error in {func_name}: {e}", exc_info=True)
+            return f"Unexpected error in {func_name}: {e}"
+    return wrapper
+# --- Symbolic math functions ---
+@math_tool_handler
+def solve_symbolic_equation(equation: str, variable: str = "x") -> str:
+    """Solve a symbolic equation (e.g., 'x**2 - 4') for the given variable."""
+    symbol = sp.symbols(variable)
+    # Ensure equation is treated as expression == 0 if no equality sign
+    if "=" not in equation:
+        expr = sp.sympify(equation)
+    else:
+        lhs, rhs = equation.split("=", 1)
+        expr = sp.Eq(sp.sympify(lhs.strip()), sp.sympify(rhs.strip()))
+    solutions = sp.solve(expr, symbol)
+    return f"Solutions: {solutions}"
+@math_tool_handler
+def compute_derivative(expression: str, variable: str = "x") -> str:
+    """Compute the symbolic derivative of an expression (e.g., 'sin(x)*x**2')."""
+    symbol = sp.symbols(variable)
+    expr = sp.sympify(expression)
+    deriv = sp.diff(expr, symbol)
+    return f"Derivative: {deriv}"
+@math_tool_handler
+def compute_integral(expression: str, variable: str = "x") -> str:
+    """Compute the symbolic indefinite integral of an expression (e.g., '1/x')."""
+    symbol = sp.symbols(variable)
+    expr = sp.sympify(expression)
+    integ = sp.integrate(expr, symbol)
+    return f"Integral: {integ} + C"
+@math_tool_handler
+def compute_limit(
+    expression: str, variable: str = "x", point: str = "oo"
+) -> str:
+    """Compute the limit of an expression (e.g., 'sin(x)/x') as variable approaches point (e.g., '0', 'oo')."""
+    symbol = sp.symbols(variable)
+    expr = sp.sympify(expression)
+    # Handle 'oo', '-oo', 'zoo' for infinity, or numerical points
+    if point.lower() == "oo":
+        pt = sp.oo
+    elif point.lower() == "-oo":
+        pt = -sp.oo
+    elif point.lower() == "zoo":
+        pt = sp.zoo # Complex infinity
+    else:
+        pt = sp.sympify(point)
+    lim = sp.limit(expr, symbol, pt)
+    return f"Limit at {point}: {lim}"
+@math_tool_handler
+def simplify_expression(expression: str) -> str:
+    """Simplify a symbolic expression (e.g., 'sin(x)**2 + cos(x)**2')."""
+    expr = sp.sympify(expression)
+    simp = sp.simplify(expr)
+    return f"Simplified expression: {simp}"
+@math_tool_handler
+def expand_expression(expression: str) -> str:
+    """Expand a symbolic expression (e.g., '(x+y)**2')."""
+    expr = sp.sympify(expression)
+    exp = sp.expand(expr)
+    return f"Expanded expression: {exp}"
+@math_tool_handler
+def factor_expression(expression: str) -> str:
+    """Factor a symbolic expression (e.g., 'x**2 - y**2')."""
+    expr = sp.sympify(expression)
+    fact = sp.factor(expr)
+    return f"Factored expression: {fact}"
+# --- Matrix math functions ---
+@math_tool_handler
+def matrix_addition(a: List[List[float]], b: List[List[float]]) -> List[List[float]]:
+    """Add two matrices element-wise. Input: [[1, 2], [3, 4]], [[5, 6], [7, 8]]."""
+    A = np.array(a)
+    B = np.array(b)
+    if A.shape != B.shape:
+        raise ValueError("Matrices must have the same shape for addition.")
+    return (A + B)
+@math_tool_handler
+def matrix_subtraction(a: List[List[float]], b: List[List[float]]) -> List[List[float]]:
+    """Subtract matrix B from matrix A element-wise. Input: [[5, 6], [7, 8]], [[1, 2], [3, 4]]."""
+    A = np.array(a)
+    B = np.array(b)
+    if A.shape != B.shape:
+        raise ValueError("Matrices must have the same shape for subtraction.")
+    return (A - B)
+@math_tool_handler
+def matrix_multiplication(a: List[List[float]], b: List[List[float]]) -> List[List[float]]:
+    """Multiply two matrices. Input: [[1, 2], [3, 4]], [[5, 6], [7, 8]]."""
+    A = np.array(a)
+    B = np.array(b)
+    if A.shape[1] != B.shape[0]:
+        raise ValueError("Inner dimensions must match for matrix multiplication.")
+    return np.matmul(A, B)
+@math_tool_handler
+def matrix_inverse(matrix: List[List[float]]) -> List[List[float]]:
+    """Compute the inverse of a square matrix. Input: [[1, 2], [3, 4]]."""
+    M = np.array(matrix)
+    if M.shape[0] != M.shape[1]:
+        raise ValueError("Matrix must be square to compute inverse.")
+    return np.linalg.inv(M)
+@math_tool_handler
+def matrix_determinant(matrix: List[List[float]]) -> float:
+    """Compute the determinant of a square matrix. Input: [[1, 2], [3, 4]]."""
+    M = np.array(matrix)
+    if M.shape[0] != M.shape[1]:
+        raise ValueError("Matrix must be square to compute determinant.")
+    return np.linalg.det(M)
+@math_tool_handler
+def matrix_transpose(matrix: List[List[float]]) -> List[List[float]]:
+    """Transpose a matrix. Input: [[1, 2, 3], [4, 5, 6]]."""
+    M = np.array(matrix)
+    return M.T
+@math_tool_handler
+def matrix_rank(matrix: List[List[float]]) -> int:
+    """Compute the rank of a matrix. Input: [[1, 2], [2, 4]]."""
+    M = np.array(matrix)
+    return np.linalg.matrix_rank(M)
+@math_tool_handler
+def matrix_trace(matrix: List[List[float]]) -> float:
+    """Compute the trace of a square matrix. Input: [[1, 2], [3, 4]]."""
+    M = np.array(matrix)
+    if M.shape[0] != M.shape[1]:
+        raise ValueError("Matrix must be square to compute trace.")
+    return np.trace(M)
+@math_tool_handler
+def matrix_norm(matrix: List[List[float]], ord_str: str = "fro") -> float:
+    """Compute the norm of a matrix. ord_str can be 'fro' (Frobenius), 'nuc' (nuclear), inf, -inf, 1, -1, 2, -2. Input: [[1, 2], [3, 4]]."""
+    M = np.array(matrix)
+    ord_map = {"fro": "fro", "nuc": "nuc", "inf": np.inf, "-inf": -np.inf, "1": 1, "-1": -1, "2": 2, "-2": -2}
+    ord_val = ord_map.get(ord_str)
+    if ord_val is None:
+        raise ValueError(f"Invalid ord_str: {ord_str}. Must be one of {list(ord_map.keys())}")
+    return np.linalg.norm(M, ord=ord_val)
+@math_tool_handler
+def eigenvalues(matrix: List[List[float]]) -> List[complex]:
+    """Compute eigenvalues of a square matrix. Input: [[1, -1], [1, 1]]."""
+    M = np.array(matrix)
+    if M.shape[0] != M.shape[1]:
+        raise ValueError("Matrix must be square to compute eigenvalues.")
+    vals = np.linalg.eigvals(M)
+    return vals
+@math_tool_handler
+def eigenvectors(matrix: List[List[float]]) -> List[List[complex]]:
+    """Compute eigenvectors of a square matrix. Returns list of eigenvectors. Input: [[1, -1], [1, 1]]."""
+    M = np.array(matrix)
+    if M.shape[0] != M.shape[1]:
+        raise ValueError("Matrix must be square to compute eigenvectors.")
+    vals, vecs = np.linalg.eig(M)
+    # Return eigenvectors as rows or columns? Let's return as list of column vectors
+    return vecs.T # Transpose to get eigenvectors as list items
+@math_tool_handler
+def svd_decompose(matrix: List[List[float]]) -> Dict[str, List]:
+    """Compute the singular value decomposition (U, S, Vh) of a matrix. Input: [[1, 2], [3, 4], [5, 6]]."""
+    M = np.array(matrix)
+    U, S, Vh = np.linalg.svd(M)
+    return {"U": U, "S": S, "Vh": Vh}
+@math_tool_handler
+def lu_decompose(matrix: List[List[float]]) -> Dict[str, List]:
+    """Compute the LU decomposition (P, L, U) of a matrix. Input: [[1, 2], [3, 4]]."""
+    M = np.array(matrix)
+    P, L, U = la.lu(M)
+    return {"P": P, "L": L, "U": U}
+@math_tool_handler
+def qr_decompose(matrix: List[List[float]]) -> Dict[str, List]:
+    """Compute the QR decomposition (Q, R) of a matrix. Input: [[1, 2], [3, 4]]."""
+    M = np.array(matrix)
+    Q, R = np.linalg.qr(M)
+    return {"Q": Q, "R": R}
+# --- Statistics functions ---
+@math_tool_handler
+def mean(values: List[float]) -> float:
+    """Compute the mean of a list of numbers. Input: [1, 2, 3, 4, 5]."""
+    if not values:
+        raise ValueError("Input list cannot be empty for mean calculation.")
+    return np.mean(np.array(values))
+@math_tool_handler
+def median(values: List[float]) -> float:
+    """Compute the median of a list of numbers. Input: [1, 3, 2, 4, 5]."""
+    if not values:
+        raise ValueError("Input list cannot be empty for median calculation.")
+    return np.median(np.array(values))
+@math_tool_handler
+def std_dev(values: List[float], ddof: int = 1) -> float:
+    """Compute the sample standard deviation (ddof=1) or population (ddof=0) of a list. Input: [1, 2, 3, 4, 5]."""
+    if not values or len(values) < ddof:
+         raise ValueError(f"Input list must have at least {ddof} elements for std dev with ddof={ddof}.")
+    return np.std(np.array(values), ddof=ddof)
+@math_tool_handler
+def variance(values: List[float], ddof: int = 1) -> float:
+    """Compute the sample variance (ddof=1) or population (ddof=0) of a list. Input: [1, 2, 3, 4, 5]."""
+    if not values or len(values) < ddof:
+         raise ValueError(f"Input list must have at least {ddof} elements for variance with ddof={ddof}.")
+    return np.var(np.array(values), ddof=ddof)
+@math_tool_handler
+def percentile(values: List[float], percent: float) -> float:
+    """Compute the q-th percentile (0<=q<=100) of a list. Input: [1, 2, 3, 4, 5], 75."""
+    if not values:
+        raise ValueError("Input list cannot be empty for percentile calculation.")
+    if not (0 <= percent <= 100):
+        raise ValueError("Percent must be between 0 and 100.")
+    return np.percentile(np.array(values), percent)
+@math_tool_handler
+def covariance(x: List[float], y: List[float], ddof: int = 1) -> float:
+    """Compute sample covariance (ddof=1) or population (ddof=0) between two lists. Input: [1, 2, 3], [4, 5, 6]."""
+    X = np.array(x)
+    Y = np.array(y)
+    if X.size != Y.size:
+        raise ValueError("Input lists must have the same length for covariance.")
+    if X.size == 0 or X.size < ddof:
+         raise ValueError(f"Input lists must have at least {ddof} elements for covariance with ddof={ddof}.")
+    # np.cov returns the covariance matrix, we want the off-diagonal element
+    return np.cov(X, Y, ddof=ddof)[0, 1]
+@math_tool_handler
+def correlation(x: List[float], y: List[float]) -> float:
+    """Compute Pearson correlation coefficient between two lists. Input: [1, 2, 3], [1, 2, 3.1]."""
+    X = np.array(x)
+    Y = np.array(y)
+    if X.size != Y.size:
+        raise ValueError("Input lists must have the same length for correlation.")
+    if X.size < 2:
+        raise ValueError("Need at least 2 data points for correlation.")
+    # np.corrcoef returns the correlation matrix
+    corr_matrix = np.corrcoef(X, Y)
+    # Handle case where std dev is zero (results in nan)
+    if np.isnan(corr_matrix[0, 1]):
+        logger.warning("Correlation resulted in NaN, likely due to zero standard deviation in one or both inputs.")
+        # Return 0 or raise error? Let's return 0 for now.
+        return 0.0
+    return corr_matrix[0, 1]
+@math_tool_handler
+def linear_regression(x: List[float], y: List[float]) -> Dict[str, float]:
+    """Perform simple linear regression (y = mx + c). Returns slope (m) and intercept (c). Input: [1, 2, 3], [2, 4.1, 5.9]."""
+    X = np.array(x)
+    Y = np.array(y)
+    if X.size != Y.size:
+        raise ValueError("Input lists must have the same length for linear regression.")
+    if X.size < 2:
+        raise ValueError("Need at least 2 data points for linear regression.")
+    slope, intercept = np.polyfit(X, Y, 1)
+    return {"slope": slope, "intercept": intercept}
+# --- Numerical functions ---
+@math_tool_handler
+def find_polynomial_roots(coefficients: List[float]) -> List[complex]:
+    """Find roots of a polynomial given coefficients [a_n, a_n-1, ..., a_0]. Input: [1, -3, 2] for x^2-3x+2."""
+    if not coefficients:
+        raise ValueError("Coefficient list cannot be empty.")
+    return np.roots(coefficients)
+@math_tool_handler
+def interpolate_value(x_vals: List[float], y_vals: List[float], x: float) -> float:
+    """Linear interpolate a value at x given data points (x_vals, y_vals). Input: [0, 1, 2], [0, 1, 4], 1.5."""
+    if len(x_vals) != len(y_vals):
+        raise ValueError("x_vals and y_vals must have the same length.")
+    if len(x_vals) < 2:
+        raise ValueError("Need at least 2 data points for interpolation.")
+    # Ensure x_vals are sorted for np.interp
+    sorted_indices = np.argsort(x_vals)
+    x_sorted = np.array(x_vals)[sorted_indices]
+    y_sorted = np.array(y_vals)[sorted_indices]
+    return np.interp(x, x_sorted, y_sorted)
+@math_tool_handler
+def numerical_integration(
+    func_str: str, a: float, b: float, variable: str = "x"
+) -> float:
+    """Numerically integrate func_str (e.g., 'x**2 * sin(x)') from a to b. Input: 'x**2', 0, 1."""
+    symbol = sp.symbols(variable)
+    # Security Note: Using sympify/lambdify can be risky if func_str is untrusted.
+    # Consider using a safer evaluation method if input is external.
+    try:
+        func = sp.sympify(func_str)
+        f_lambdified = sp.lambdify(symbol, func, modules=["numpy"])
+    except (sp.SympifyError, SyntaxError) as sym_err:
+        raise ValueError(f"Invalid function string: {func_str}. Error: {sym_err}")
+    result, abserr = quad(f_lambdified, a, b)
+    logger.info(f"Numerical integration estimated absolute error: {abserr}")
+    return result
+@math_tool_handler
+def solve_ode(
+    func_str: str, y0: float, t_eval: List[float], args: tuple = ()
+) -> List[float]:
+    """Solve a first-order ODE dy/dt = f(t, y) using scipy.integrate.solve_ivp.
+       func_str should define f(t, y), e.g., '-y + sin(t)'.
+       y0 is the initial condition y(t_eval[0]).
+       t_eval is the list of time points to evaluate the solution at.
+       args are optional additional arguments passed to f(t, y, *args).
+       Input: func_str='-y', y0=1, t_eval=[0, 1, 2, 3, 4]."""
+    from scipy.integrate import solve_ivp
+    import math # Make math functions available
+    # Security Note: Using eval is dangerous with untrusted input.
+    # A safer approach would parse the expression or use a restricted environment.
+    def ode_func(t, y, *args):
+        try:
+            # Provide t, y, args, and safe math functions in the eval context
+            local_vars = {"t": t, "y": y, "math": math, "np": np}
+            # Add args if provided
+            if args:
+                # Assuming args correspond to p1, p2, ... in the func_str
+                for i, arg_val in enumerate(args):
+                    local_vars[f"p{i+1}"] = arg_val
+            return eval(func_str, {"__builtins__": {}}, local_vars)
+        except Exception as e:
+            # Log the error and raise it to be caught by the handler
+            logger.error(f"Error evaluating ODE function {func_str} at t={t}, y={y}: {e}")
+            raise ValueError(f"Error in ODE function definition: {e}")
+    if not t_eval:
+        raise ValueError("t_eval list cannot be empty.")
+    t_span = (min(t_eval), max(t_eval))
+    sol = solve_ivp(ode_func, t_span, [y0], t_eval=t_eval, args=args)
+    if not sol.success:
+        raise RuntimeError(f"ODE solver failed: {sol.message}")
+    return sol.y[0] # Return the solution for y
+# --- Vector functions ---
+@math_tool_handler
+def dot_product(a: List[float], b: List[float]) -> float:
+    """Compute dot product of two vectors. Input: [1, 2, 3], [4, 5, 6]."""
+    A = np.array(a)
+    B = np.array(b)
+    if A.shape != B.shape:
+        raise ValueError("Vectors must have the same dimension for dot product.")
+    return np.dot(A, B)
+@math_tool_handler
+def cross_product(a: List[float], b: List[float]) -> List[float]:
+    """Compute cross product of two 3D vectors. Input: [1, 0, 0], [0, 1, 0]."""
+    A = np.array(a)
+    B = np.array(b)
+    if A.size != 3 or B.size != 3:
+        raise ValueError("Cross product is only defined for 3D vectors.")
+    return np.cross(A, B)
+@math_tool_handler
+def vector_magnitude(a: List[float]) -> float:
+    """Compute magnitude (Euclidean norm) of a vector. Input: [3, 4]."""
+    if not a:
+        raise ValueError("Input vector cannot be empty.")
+    return np.linalg.norm(np.array(a))
+@math_tool_handler
+def vector_normalize(a: List[float]) -> List[float]:
+    """Normalize a vector to unit length. Input: [3, 4]."""
+    A = np.array(a)
+    norm = np.linalg.norm(A)
+    if norm == 0:
+        raise ValueError("Cannot normalize a zero vector.")
+    return (A / norm)
+@math_tool_handler
+def vector_angle(a: List[float], b: List[float], degrees: bool = False) -> float:
+    """Compute the angle (in radians or degrees) between two vectors. Input: [1, 0], [0, 1]."""
+    dot = dot_product(a, b) # Use our handled dot_product
+    norm_a = vector_magnitude(a)
+    norm_b = vector_magnitude(b)
+    if norm_a == 0 or norm_b == 0:
+        raise ValueError("Cannot compute angle with zero vector(s).")
+    # Clip argument to arccos to avoid domain errors due to floating point inaccuracies
+    cos_theta = np.clip(dot / (norm_a * norm_b), -1.0, 1.0)
+    angle_rad = np.arccos(cos_theta)
+    return np.degrees(angle_rad) if degrees else angle_rad
+# --- Probability functions ---
+@math_tool_handler
+def binomial_pmf(k: int, n: int, p: float) -> float:
+    """Compute binomial probability mass function P(X=k | n, p). Input: k=2, n=5, p=0.5."""
+    if not (0 <= p <= 1):
+        raise ValueError("Probability p must be between 0 and 1.")
+    if not (0 <= k <= n):
+        raise ValueError("k must be between 0 and n (inclusive).")
+    return binom.pmf(k, n, p)
+@math_tool_handler
+def normal_pdf(x: float, mu: float = 0, sigma: float = 1) -> float:
+    """Compute normal distribution probability density function N(x | mu, sigma). Input: x=0, mu=0, sigma=1."""
+    if sigma <= 0:
+        raise ValueError("Standard deviation sigma must be positive.")
+    return norm.pdf(x, mu, sigma)
+@math_tool_handler
+def normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
+    """Compute normal distribution cumulative distribution function P(X<=x | mu, sigma). Input: x=0, mu=0, sigma=1."""
+    if sigma <= 0:
+        raise ValueError("Standard deviation sigma must be positive.")
+    return norm.cdf(x, mu, sigma)
+@math_tool_handler
+def poisson_pmf(k: int, lam: float) -> float:
+    """Compute Poisson probability mass function P(X=k | lambda). Input: k=2, lam=3."""
+    if lam < 0:
+        raise ValueError("Rate parameter lambda must be non-negative.")
+    if k < 0 or not isinstance(k, int):
+        raise ValueError("k must be a non-negative integer.")
+    return poisson.pmf(k, lam)
+# --- Special functions ---
+@math_tool_handler
+def gamma_function(x: float) -> float:
+    """Compute the gamma function Gamma(x). Input: 5."""
+    return special.gamma(x)
+@math_tool_handler
+def beta_function(x: float, y: float) -> float:
+    """Compute the beta function B(x, y). Input: 2, 3."""
+    return special.beta(x, y)
+@math_tool_handler
+def erf_function(x: float) -> float:
+    """Compute the error function erf(x). Input: 1."""
+    return special.erf(x)
+# --- Fourier Transform functions ---
+@math_tool_handler
+def fft_transform(y: List[float]) -> List[complex]:
+    """Compute the Fast Fourier Transform (FFT) of a real sequence y. Input: [0, 1, 0, -1]."""
+    if not y:
+        raise ValueError("Input list cannot be empty for FFT.")
+    return fft.fft(np.array(y))
+@math_tool_handler
+def ifft_transform(y_complex: List[complex]) -> List[complex]:
+    """Compute the inverse Fast Fourier Transform (IFFT) of a complex sequence. Input: result from fft_transform."""
+    if not y_complex:
+        raise ValueError("Input list cannot be empty for IFFT.")
+    return fft.ifft(np.array(y_complex))
+# --- Tool List Creation ---
+def get_python_math_tools() -> List[FunctionTool]:
+    """Returns a list of FunctionTools for the Python math functions."""
+    py_tools = [
+        # Symbolic
+        FunctionTool.from_defaults(fn=solve_symbolic_equation),
+        FunctionTool.from_defaults(fn=compute_derivative),
+        FunctionTool.from_defaults(fn=compute_integral),
+        FunctionTool.from_defaults(fn=compute_limit),
+        FunctionTool.from_defaults(fn=simplify_expression),
+        FunctionTool.from_defaults(fn=expand_expression),
+        FunctionTool.from_defaults(fn=factor_expression),
+        # Matrix
+        FunctionTool.from_defaults(fn=matrix_addition),
+        FunctionTool.from_defaults(fn=matrix_subtraction),
+        FunctionTool.from_defaults(fn=matrix_multiplication),
+        FunctionTool.from_defaults(fn=matrix_inverse),
+        FunctionTool.from_defaults(fn=matrix_determinant),
+        FunctionTool.from_defaults(fn=matrix_transpose),
+        FunctionTool.from_defaults(fn=matrix_rank),
+        FunctionTool.from_defaults(fn=matrix_trace),
+        FunctionTool.from_defaults(fn=matrix_norm),
+        FunctionTool.from_defaults(fn=eigenvalues),
+        FunctionTool.from_defaults(fn=eigenvectors),
+        FunctionTool.from_defaults(fn=svd_decompose),
+        FunctionTool.from_defaults(fn=lu_decompose),
+        FunctionTool.from_defaults(fn=qr_decompose),
+        # Statistics
+        FunctionTool.from_defaults(fn=mean),
+        FunctionTool.from_defaults(fn=median),
+        FunctionTool.from_defaults(fn=std_dev),
+        FunctionTool.from_defaults(fn=variance),
+        FunctionTool.from_defaults(fn=percentile),
+        FunctionTool.from_defaults(fn=covariance),
+        FunctionTool.from_defaults(fn=correlation),
+        FunctionTool.from_defaults(fn=linear_regression),
+        # Numerical
+        FunctionTool.from_defaults(fn=find_polynomial_roots),
+        FunctionTool.from_defaults(fn=interpolate_value),
+        FunctionTool.from_defaults(fn=numerical_integration),
+        FunctionTool.from_defaults(fn=solve_ode),
+        # Vector
+        FunctionTool.from_defaults(fn=dot_product),
+        FunctionTool.from_defaults(fn=cross_product),
+        FunctionTool.from_defaults(fn=vector_magnitude),
+        FunctionTool.from_defaults(fn=vector_normalize),
+        FunctionTool.from_defaults(fn=vector_angle),
+        # Probability
+        FunctionTool.from_defaults(fn=binomial_pmf),
+        FunctionTool.from_defaults(fn=normal_pdf),
+        FunctionTool.from_defaults(fn=normal_cdf),
+        FunctionTool.from_defaults(fn=poisson_pmf),
+        # Special Functions
+        FunctionTool.from_defaults(fn=gamma_function),
+        FunctionTool.from_defaults(fn=beta_function),
+        FunctionTool.from_defaults(fn=erf_function),
+        # Fourier
+        FunctionTool.from_defaults(fn=fft_transform),
+        FunctionTool.from_defaults(fn=ifft_transform),
+    ]
+    # Update descriptions for clarity if needed (optional)
+    for tool in py_tools:
+        tool.metadata.description = f"(Python) {tool.metadata.description}"
+    logger.info(f"Created {len(py_tools)} Python math tools.")
+    return py_tools
+# --- Wolfram Alpha Tool ---
+_wolfram_alpha_tools = None
+def get_wolfram_alpha_tools() -> List[FunctionTool]:
+    """Initializes and returns Wolfram Alpha tools (singleton)."""
+    global _wolfram_alpha_tools
+    if _wolfram_alpha_tools is None:
+        logger.info("Initializing WolframAlphaToolSpec...")
+        wolfram_alpha_app_id = os.getenv("WOLFRAM_ALPHA_APP_ID")
+        if not wolfram_alpha_app_id:
+            logger.warning("WOLFRAM_ALPHA_APP_ID not set. Wolfram Alpha tools will be unavailable.")
+            _wolfram_alpha_tools = []
+        else:
+            try:
+                spec = WolframAlphaToolSpec(app_id=wolfram_alpha_app_id)
+                _wolfram_alpha_tools = spec.to_tool_list()
+                # Add prefix to description for clarity
+                for tool in _wolfram_alpha_tools:
+                     tool.metadata.description = f"(WolframAlpha) {tool.metadata.description}"
+                logger.info(f"WolframAlpha tools initialized: {len(_wolfram_alpha_tools)} tools.")
+            except Exception as e:
+                logger.error(f"Failed to initialize WolframAlpha tools: {e}", exc_info=True)
+                _wolfram_alpha_tools = []
+    return _wolfram_alpha_tools
+# --- Agent Initialization ---
+def initialize_math_agent() -> ReActAgent:
+    """Initializes the Math Agent with Python and Wolfram Alpha tools."""
+    logger.info("Initializing MathAgent...")
+    # Configuration
+    agent_llm_model = os.getenv("MATH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found in environment variables for MathAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for MathAgent")
+    try:
+        llm = GoogleGenAI(
+            api_key=gemini_api_key,
+            model=agent_llm_model,
+        )
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Combine Python tools and Wolfram Alpha tools
+        all_tools = get_python_math_tools() + get_wolfram_alpha_tools()
+        if not all_tools:
+             logger.warning("No math tools available (Python or WolframAlpha). MathAgent may be ineffective.")
+        # System prompt (consider loading from file)
+        system_prompt = """\
+        You are MathAgent, a powerful mathematical problem solver. Your goal is to accurately answer mathematical questions using the available tools.
+        Available Tools:
+        - Python Tools: A comprehensive suite for symbolic math (SymPy), numerical computation (NumPy/SciPy), statistics, linear algebra, calculus, ODEs, and transforms. Prefixed with '(Python)'. Use these for precise calculations when the method is clear.
+        - WolframAlpha Tool: Accesses Wolfram Alpha for complex queries, natural language math questions, data, and real-world facts. Prefixed with '(WolframAlpha)'. Use this for broader questions, knowledge-based math, or when Python tools are insufficient.
+        Workflow:
+        1. **Thought**: Analyze the question. Determine the mathematical concepts involved. Decide the best tool or sequence of tools to use. Prefer Python tools for specific, well-defined calculations. Use WolframAlpha for complex, ambiguous, or knowledge-based queries.
+        2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements (e.g., list of lists for matrices, strings for symbolic expressions).
+        3. **Observation**: Examine the tool's output. Check for errors or unexpected results.
+        4. **Iteration**: If the result is incorrect or incomplete, rethink the approach. Try a different tool, adjust parameters, or break the problem down further. If a Python tool fails, consider rephrasing for WolframAlpha.
+        5. **Final Answer**: Once the correct answer is obtained, state it clearly and concisely. Provide the numerical result, symbolic expression, or explanation as requested.
+        6. **Hand-Off**: Pass the final mathematical result or analysis to **planner_agent** for integration into the overall response.
+        Constraints:
+        - Always use a tool for calculations; do not perform calculations yourself.
+        - Clearly state which tool you are using and why.
+        - Handle potential errors gracefully and report them if they prevent finding a solution.
+        - Pay close attention to input formats required by each tool (e.g., lists for vectors/matrices, strings for symbolic expressions).
+        """
+        agent = ReActAgent(
+            name="math_agent",
+            description=(
+                "MathAgent solves mathematical problems using a suite of Python tools (SymPy, NumPy, SciPy) and WolframAlpha. "
+                "It handles symbolic math, numerical computation, statistics, linear algebra, calculus, and more."
+            ),
+            tools=all_tools,
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=["planner_agent"],
+        )
+        logger.info("MathAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during MathAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running math_agent.py directly for testing...")
+    # Ensure API keys are set for testing
+    required_keys = ["GEMINI_API_KEY"] # WOLFRAM_ALPHA_APP_ID is optional
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        if not os.getenv("WOLFRAM_ALPHA_APP_ID"):
+            print("Warning: WOLFRAM_ALPHA_APP_ID not set. WolframAlpha tools will be unavailable for testing.")
+        try:
+            test_agent = initialize_math_agent()
+            print("Math Agent initialized successfully for testing.")
+            # Example test
+            # result = test_agent.chat("What is the integral of x**2 from 0 to 1?")
+            # print(f"Test query result: {result}")
+            # result2 = test_agent.chat("what is the population of france?") # Test WolframAlpha
+            # print(f"Test query 2 result: {result2}")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/planner_agent.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import os
+import logging
+from typing import List, Dict
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        # Assuming the prompt file is in the same directory as the agent script
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+# --- Tool Functions ---
+def plan(objective: str) -> List[str]:
+    """
+    Generate a list of sub-steps (4-8) from the given objective using an LLM.
+    Args:
+        objective (str): The research or task objective.
+    Returns:
+        List[str]: A list of sub-steps as strings, or an error message list.
+    """
+    logger.info(f"Generating plan for objective: {objective[:100]}...")
+    # Configuration for planning LLM
+    planner_llm_model = os.getenv("PLANNER_TOOL_LLM_MODEL", "models/gemini-1.5-pro") # Specific model for this tool?
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for planning tool LLM.")
+        return ["Error: GEMINI_API_KEY not set for planning."]
+    # Prompt for the LLM to generate sub-steps
+    input_prompt = (
+        "You are a research assistant. "
+        "Given an objective, break it down into a list of 4-8 concise, actionable sub-steps. "
+        "Ensure the steps are logically ordered.\n"
+        f"Objective: {objective}\n"
+        "Sub-steps (one per line, numbered):"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=planner_llm_model)
+        logger.info(f"Using planning LLM: {planner_llm_model}")
+        response = llm.complete(input_prompt)
+        # Post-process: split lines into sub-steps, remove numbering if present
+        lines = response.text.strip().split("\n")
+        sub_steps = []
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            # Remove potential leading numbering (e.g., "1. ", "- ")
+            if line and line[0].isdigit() and "." in line[:3]:
+                text = line.split(".", 1)[1].strip()
+            elif line.startswith("- "):
+                 text = line[2:].strip()
+            else:
+                text = line
+            if text:
+                sub_steps.append(text)
+        if not sub_steps:
+             logger.warning("LLM generated no sub-steps for the objective.")
+             return ["Error: Failed to generate sub-steps."]
+        logger.info(f"Generated {len(sub_steps)} sub-steps.")
+        return sub_steps
+    except Exception as e:
+        logger.error(f"LLM call failed during planning: {e}", exc_info=True)
+        return [f"Error during planning: {e}"]
+def synthesize_and_respond(results: List[Dict[str, str]]) -> str:
+    """
+    Aggregate results from sub-steps into a coherent final report using an LLM.
+    Args:
+        results (List[Dict[str, str]]): List of dictionaries, each with "sub_step" and "answer" keys.
+    Returns:
+        str: A unified, well-structured response, or an error message.
+    """
+    logger.info(f"Synthesizing results from {len(results)} sub-steps...")
+    if not results:
+        logger.warning("Synthesize called with empty results list.")
+        return "No results provided to synthesize."
+    # Format the results for the synthesis prompt
+    summary_blocks = ""
+    for i, result in enumerate(results):
+        sub_step = result.get("sub_step", f"Step {i+1}")
+        answer = result.get("answer", "No answer provided.")
+        summary_blocks += f"Sub-step {i+1}: {sub_step}\nAnswer {i+1}: {answer}\n\n"
+    # Configuration for synthesis LLM
+    synthesizer_llm_model = os.getenv("SYNTHESIZER_LLM_MODEL", "models/gemini-1.5-pro") # Specific model?
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for synthesis tool LLM.")
+        return "Error: GEMINI_API_KEY not set for synthesis."
+    # Prompt for the LLM
+    input_prompt = f"""You are an expert synthesizer. Given the following sub-steps and their answers derived from an initial objective, produce a single, coherent, comprehensive final report that addresses the original objective:
+    --- SUB-STEP RESULTS ---
+    {summary_blocks.strip()}
+    --- END SUB-STEP RESULTS ---
+    Generate the Final Report:
+    """
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=synthesizer_llm_model)
+        logger.info(f"Using synthesis LLM: {synthesizer_llm_model}")
+        response = llm.complete(input_prompt)
+        logger.info("Synthesis successful.")
+        return response.text
+    except Exception as e:
+        logger.error(f"LLM call failed during synthesis: {e}", exc_info=True)
+        return f"Error during synthesis: {e}"
+# --- Tool Definitions ---
+synthesize_tool = FunctionTool.from_defaults(
+    fn=synthesize_and_respond,
+    name="synthesize_and_respond",
+    description=(
+        "Aggregates results from multiple sub-steps into a final coherent report. "
+        "Input: results (List[Dict[str, str]]) where each dict has \"sub_step\" and \"answer\". "
+        "Output: A unified report (str) or error message."
+    ),
+)
+generate_substeps_tool = FunctionTool.from_defaults(
+    fn=plan,
+    name="generate_substeps",
+    description=(
+        "Decomposes a high-level objective into a concise roadmap of 4–8 actionable sub-steps using an LLM. "
+        "Input: objective (str). Output: List of sub-step strings (List[str]) or error list."
+    )
+)
+# --- Agent Initialization ---
+def initialize_planner_agent() -> ReActAgent:
+    """Initializes the Planner Agent."""
+    logger.info("Initializing PlannerAgent...")
+    # Configuration for the agent's main LLM
+    agent_llm_model = os.getenv("PLANNER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for PlannerAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for PlannerAgent")
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Load system prompt
+        default_system_prompt = ("You are PlannerAgent... [Default prompt content - replace with actual]" # Placeholder
+                              )
+        system_prompt = load_prompt_from_file("../prompts/planner_agent_prompt.txt", default_system_prompt)
+        if system_prompt == default_system_prompt:
+             logger.warning("Using default/fallback system prompt for PlannerAgent.")
+        # Define available tools
+        tools = [generate_substeps_tool, synthesize_tool]
+        # Define valid handoff targets
+        valid_handoffs = [
+            "code_agent",
+            "research_agent",
+            "math_agent",
+            "role_agent",
+            "image_analyzer_agent",
+            "text_analyzer_agent",
+            "verifier_agent",
+            "reasoning_agent"
+        ]
+        agent = ReActAgent(
+            name="planner_agent",
+            description=(
+                "Strategically plans tasks by breaking down objectives into sub-steps using `generate_substeps`. "
+                "Orchestrates execution by handing off sub-steps to specialized agents. "
+                "Synthesizes final results using `synthesize_and_respond`."
+            ),
+            tools=tools,
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=valid_handoffs,
+        )
+        logger.info("PlannerAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during PlannerAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running planner_agent.py directly for testing...")
+    # Ensure API key is set
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
+    else:
+        try:
+            # Test plan generation
+            print("\nTesting plan generation...")
+            test_objective = "Analyze the market trends for electric vehicles in Europe for 2024."
+            substeps = plan(test_objective)
+            print(f"Generated Sub-steps:\n{substeps}")
+            # Test synthesis
+            print("\nTesting synthesis...")
+            test_results = [
+                {"sub_step": "Identify key EV manufacturers in Europe.", "answer": "Tesla, VW, Stellantis, Renault."},
+                {"sub_step": "Find recent sales data.", "answer": "EV sales grew 25% year-over-year in Q1 2024."},
+                {"sub_step": "Analyze government incentives.", "answer": "Germany reduced subsidies, France maintained them."}
+            ]
+            report = synthesize_and_respond(test_results)
+            print(f"Synthesized Report:\n{report}")
+            # Initialize the agent (optional)
+            # test_agent = initialize_planner_agent()
+            # print("\nPlanner Agent initialized successfully for testing.")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/reasoning_agent.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import os
+import logging
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.llms.openai import OpenAI
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        # Assuming the prompt file is in the same directory as the agent script
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+# --- Tool Function ---
+def reasoning_tool_fn(context: str) -> str:
+    """
+    Perform chain-of-thought reasoning over the provided context using a dedicated LLM.
+    Args:
+        context (str): The conversation/workflow history and current problem statement.
+    Returns:
+        str: A structured reasoning trace and conclusion, or an error message.
+    """
+    logger.info(f"Executing reasoning tool with context length: {len(context)}")
+    # Configuration for the reasoning LLM (OpenAI in the original)
+    reasoning_llm_model = os.getenv("REASONING_LLM_MODEL", "gpt-4o-mini") # Use gpt-4o-mini as default
+    openai_api_key = os.getenv("ALPAFLOW_OPENAI_API_KEY") # Specific key from original code
+    if not openai_api_key:
+        logger.error("ALPAFLOW_OPENAI_API_KEY not found for reasoning tool LLM.")
+        return "Error: ALPAFLOW_OPENAI_API_KEY must be set to use the reasoning tool."
+    # Define the prompt for the reasoning LLM
+    reasoning_prompt = f"""You are an expert reasoning engine. Analyze the following workflow context and problem statement:
+    --- CONTEXT START ---
+    {context}
+    --- CONTEXT END ---
+    Perform the following steps:
+    1. **Comprehension**: Identify the core question/problem and key constraints from the context.
+    2. **Decomposition**: Break the problem into logical sub-steps.
+    3. **Chain-of-Thought**: Reason through each sub-step, stating assumptions and deriving implications.
+    4. **Verification**: Check conclusions against constraints.
+    5. **Synthesis**: Integrate results into a cohesive answer/recommendation.
+    6. **Clarity**: Use precise language.
+    Respond with your numbered reasoning steps followed by a concise final conclusion or recommendation.
+    """
+    try:
+        # Note: Original used OpenAI with a specific key and model. Retaining that.
+        # Consider adding `reasoning_effort="high"` if supported and desired.
+        llm = OpenAI(
+            model=reasoning_llm_model,
+            api_key=openai_api_key,
+            # reasoning_effort="high" # Add if needed and supported by the specific OpenAI integration
+        )
+        logger.info(f"Using reasoning LLM: {reasoning_llm_model}")
+        response = llm.complete(reasoning_prompt)
+        logger.info("Reasoning tool execution successful.")
+        return response.text
+    except Exception as e:
+        logger.error(f"Error during reasoning tool LLM call: {e}", exc_info=True)
+        return f"Error during reasoning: {e}"
+# --- Tool Definition ---
+reasoning_tool = FunctionTool.from_defaults(
+    fn=reasoning_tool_fn,
+    name="reasoning_tool",
+    description=(
+        "Applies detailed chain-of-thought reasoning to the provided workflow context using a dedicated LLM. "
+        "Input: context (str). Output: Reasoning steps and conclusion (str) or error message."
+    ),
+)
+# --- Agent Initialization ---
+def initialize_reasoning_agent() -> ReActAgent:
+    """Initializes the Reasoning Agent."""
+    logger.info("Initializing ReasoningAgent...")
+    # Configuration for the agent's main LLM (Google GenAI)
+    agent_llm_model = os.getenv("REASONING_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for ReasoningAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for ReasoningAgent")
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Load system prompt
+        default_system_prompt = ("You are ReasoningAgent... [Default prompt content - replace with actual]" # Placeholder
+                              )
+        system_prompt = load_prompt_from_file("../prompts/reasoning_agent_prompt.txt", default_system_prompt)
+        if system_prompt == default_system_prompt:
+             logger.warning("Using default/fallback system prompt for ReasoningAgent.")
+        agent = ReActAgent(
+            name="reasoning_agent",
+            description=(
+                "A pure reasoning agent that uses the `reasoning_tool` for detailed chain-of-thought analysis "
+                "on the provided context, then hands off the result to the `planner_agent`."
+            ),
+            tools=[reasoning_tool], # Only has access to the reasoning tool
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=["planner_agent"],
+        )
+        logger.info("ReasoningAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during ReasoningAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running reasoning_agent.py directly for testing...")
+    # Check required keys
+    required_keys = ["GEMINI_API_KEY", "ALPAFLOW_OPENAI_API_KEY"]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        try:
+            # Test the reasoning tool directly
+            print("\nTesting reasoning_tool_fn...")
+            test_context = "User asked: What is the capital of France? ResearchAgent found: Paris. VerifierAgent confirmed: High confidence."
+            reasoning_output = reasoning_tool_fn(test_context)
+            print(f"Reasoning Tool Output:\n{reasoning_output}")
+            # Initialize the agent (optional)
+            # test_agent = initialize_reasoning_agent()
+            # print("\nReasoning Agent initialized successfully for testing.")
+            # Example chat (would require context passing mechanism)
+            # result = test_agent.chat("Synthesize the findings about the capital of France.")
+            # print(f"Agent chat result: {result}")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/research_agent.py ADDED Viewed

	@@ -0,0 +1,622 @@

+import os
+import time
+import logging
+import re # Import regex for video ID extraction
+from typing import List, Optional, Dict # Added Dict
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.tools.google import GoogleSearchToolSpec
+from llama_index.tools.tavily_research import TavilyToolSpec
+from llama_index.tools.wikipedia import WikipediaToolSpec
+from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
+from llama_index.tools.yahoo_finance import YahooFinanceToolSpec
+from llama_index.tools.arxiv import ArxivToolSpec
+# Attempt to import browser tools; handle import errors gracefully
+try:
+    from selenium import webdriver
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.common.keys import Keys
+    from selenium.common.exceptions import WebDriverException, NoSuchElementException, TimeoutException
+    from helium import start_chrome, go_to, find_all, Text, kill_browser, get_driver, click, write, press
+    SELENIUM_AVAILABLE = True
+except ImportError:
+    logging.warning("Selenium or Helium not installed. Browser interaction tools will be unavailable.")
+    SELENIUM_AVAILABLE = False
+# Attempt to import YouTube transcript API
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
+    YOUTUBE_TRANSCRIPT_API_AVAILABLE = True
+except ImportError:
+    logging.warning("youtube-transcript-api not installed. YouTube transcript tool will be unavailable.")
+    YOUTUBE_TRANSCRIPT_API_AVAILABLE = False
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# --- Helper function to extract YouTube Video ID ---
+def extract_video_id(url: str) -> Optional[str]:
+    """Extracts the YouTube video ID from various URL formats."""
+    # Standard watch URL: https://www.youtube.com/watch?v=VIDEO_ID
+    match = re.search(r'(?:v=|/v/|embed/|youtu\.be/|/shorts/)([A-Za-z0-9_-]+)', url)
+    if match:
+        return match.group(1)
+    return None
+# --- YouTube Transcript Tool ---
+def get_youtube_transcript(video_url_or_id: str, languages=None) -> str:
+    """Fetches the transcript for a YouTube video using its URL or video ID.
+       Specify preferred languages as a list (e.g., ["en", "es"]).
+       Returns the transcript text or an error message.
+    """
+    if languages is None:
+        languages = ["en"]
+    if not YOUTUBE_TRANSCRIPT_API_AVAILABLE:
+        return "Error: youtube-transcript-api library is required but not installed."
+    logger.info(f"Attempting to fetch YouTube transcript for: {video_url_or_id}")
+    video_id = extract_video_id(video_url_or_id)
+    if not video_id:
+        # Assume it might be an ID already if extraction fails
+        if re.match(r"^[a-zA-Z0-9_\-]+$", video_url_or_id):
+             video_id = video_url_or_id
+             logger.info("Input treated as video ID.")
+        else:
+             logger.error(f"Could not extract valid YouTube video ID from: {video_url_or_id}")
+             return f"Error: Invalid YouTube URL or Video ID format: {video_url_or_id}"
+    try:
+        # Fetch available transcripts
+        api = YouTubeTranscriptApi()
+        transcript_list = api.list(video_id)
+        # Try to find a transcript in the specified languages
+        transcript = transcript_list.find_transcript(languages)
+        # Fetch the actual transcript data (list of dicts)
+        transcript_data = transcript.fetch()
+        # Combine the text parts into a single string
+        full_transcript = " ".join(snippet.text for snippet in transcript_data)
+        full_transcript = " ".join(snippet.text for snippet in transcript_data)
+        logger.info(f"Successfully fetched transcript for video ID {video_id} in language {transcript.language}.")
+        return full_transcript
+    except TranscriptsDisabled:
+        logger.warning(f"Transcripts are disabled for video ID: {video_id}")
+        return f"Error: Transcripts are disabled for this video (ID: {video_id})."
+    except NoTranscriptFound as e:
+        logger.warning(f"No transcript found for video ID {video_id} in languages {languages}. Available: {e.available_transcripts}")
+        # Try fetching any available transcript if specific languages failed
+        try:
+             logger.info(f"Attempting to fetch any available transcript for {video_id}")
+             any_transcript = transcript_list.find_generated_transcript(transcript_list.manually_created_transcripts.keys() or transcript_list.generated_transcripts.keys())
+             any_transcript_data = any_transcript.fetch()
+             full_transcript = " ".join([item["text"] for item in any_transcript_data])
+             logger.info(f"Successfully fetched fallback transcript for video ID {video_id} in language {any_transcript.language}.")
+             return full_transcript
+        except Exception as fallback_e:
+             logger.error(f"Could not find any transcript for video ID {video_id}. Original error: {e}. Fallback error: {fallback_e}")
+             return f"Error: No transcript found for video ID {video_id} in languages {languages} or any fallback language."
+    except Exception as e:
+        logger.error(f"Unexpected error fetching transcript for video ID {video_id}: {e}", exc_info=True)
+        return f"Error fetching transcript: {e}"
+# --- Browser Interaction Tools (Conditional on Selenium/Helium availability) ---
+# Global browser instance (managed by initializer)
+_browser_instance = None
+_browser_driver = None
+# Helper decorator for browser tool error handling and logging
+def browser_tool_handler(func):
+    def wrapper(*args, **kwargs):
+        if not SELENIUM_AVAILABLE:
+            return "Error: Browser tools require Selenium and Helium to be installed."
+        if _browser_instance is None or _browser_driver is None:
+             # Attempt to initialize if not already done (e.g., if called directly)
+             # This is not ideal, initialization should happen via get_research_initializer()
+             logger.warning("Browser accessed before explicit initialization. Attempting to initialize now.")
+             try:
+                 get_research_initializer() # This will initialize the browser
+                 if _browser_instance is None or _browser_driver is None:
+                      return "Error: Browser initialization failed."
+             except Exception as init_err:
+                 return f"Error: Browser initialization failed: {init_err}"
+        func_name = func.__name__
+        logger.info(f"Executing browser tool: {func_name} with args: {args}, kwargs: {kwargs}")
+        try:
+            result = func(*args, **kwargs)
+            logger.info(f"Tool {func_name} executed successfully.")
+            # Ensure result is a string for consistency
+            return str(result) if result is not None else f"{func_name} completed."
+        except (NoSuchElementException, WebDriverException, TimeoutException) as e:
+            logger.warning(f"Browser error in {func_name}: {e.__class__.__name__} - {str(e).split()[0]}")
+            return f"Error in {func_name}: {e.__class__.__name__} - {str(e).split()[0]}"
+        except Exception as e:
+            logger.error(f"Unexpected error in {func_name}: {e}", exc_info=True)
+            return f"Unexpected error in {func_name}: {e}"
+    return wrapper
+@browser_tool_handler
+def visit(url: str, wait_seconds: float = 3.0) -> str:
+    """Navigate the browser to the specified URL and wait for the page to load."""
+    logger.info(f"Navigating to {url} and waiting {wait_seconds}s...")
+    go_to(url)
+    time.sleep(wait_seconds) # Wait for dynamic content
+    current_url = _browser_driver.current_url
+    return f"Successfully navigated to: {current_url}"
+@browser_tool_handler
+def get_text_by_css(selector: str) -> List[str]:
+    """Extract text from all elements matching a CSS selector. Use selector=\"body\" for all visible text."""
+    logger.info(f"Extracting text using CSS selector: {selector}")
+    if selector.lower() == "body":
+        # Helium Text() might be too broad, let's try body tag first
+        try:
+            body_element = _browser_driver.find_element(By.TAG_NAME, "body")
+            all_text = body_element.text.split("\n") # Split into lines
+            # Filter out empty lines
+            non_empty_text = [line.strip() for line in all_text if line.strip()]
+            logger.info(f"Extracted {len(non_empty_text)} lines of text from body.")
+            return non_empty_text
+        except NoSuchElementException:
+             logger.warning("Could not find body tag, falling back to Helium Text().")
+             elements = find_all(Text())
+             # Process Helium elements if fallback is used
+             texts = [elem.web_element.text for elem in elements if elem.web_element.is_displayed() and elem.web_element.text.strip()]
+             logger.info(f"Extracted {len(texts)} visible text elements using Helium Text().")
+             return texts
+    else:
+        # Use Selenium directly for more control
+        elements_selenium = _browser_driver.find_elements(By.CSS_SELECTOR, selector)
+        texts = [elem.text for elem in elements_selenium if elem.is_displayed() and elem.text.strip()]
+        logger.info(f"Extracted {len(texts)} visible text elements for selector {selector}.")
+        return texts
+@browser_tool_handler
+def get_page_html() -> str:
+    """Return the full HTML source of the current page."""
+    logger.info("Retrieving page HTML source...")
+    return _browser_driver.page_source
+@browser_tool_handler
+def click_element_by_css(selector: str, index: int = 0) -> str:
+    """Click on the Nth (0-based index) element matching the CSS selector."""
+    logger.info(f"Attempting to click element {index} matching selector: {selector}")
+    # Use Selenium directly for finding elements
+    elements_selenium = _browser_driver.find_elements(By.CSS_SELECTOR, selector)
+    if not elements_selenium:
+        raise NoSuchElementException(f"No elements found for selector: {selector}")
+    if index >= len(elements_selenium):
+        raise IndexError(f"Index {index} out of bounds. Only {len(elements_selenium)} elements found for selector: {selector}")
+    target_element = elements_selenium[index]
+    if not target_element.is_displayed() or not target_element.is_enabled():
+        logger.warning(f"Element {index} for selector {selector} is not visible or enabled. Attempting click anyway.")
+        # Try scrolling into view first
+        try:
+             _browser_driver.execute_script("arguments[0].scrollIntoView(true);", target_element)
+             time.sleep(0.5)
+        except Exception as scroll_err:
+             logger.warning(f"Could not scroll element into view: {scroll_err}")
+    # Use Helium click which might handle overlays better, passing the Selenium element
+    click(target_element)
+    time.sleep(1.5) # Increased wait after click
+    return f"Clicked element {index} matching selector {selector}. Current URL: {_browser_driver.current_url}"
+@browser_tool_handler
+def input_text_by_css(selector: str, text: str, index: int = 0, press_enter: bool = False) -> str:
+    """Input text into the Nth (0-based index) element matching the CSS selector. Optionally press Enter."""
+    logger.info(f"Attempting to input text into element {index} matching selector: {selector}")
+    # Use Selenium directly for finding elements
+    elements_selenium = _browser_driver.find_elements(By.CSS_SELECTOR, selector)
+    if not elements_selenium:
+        raise NoSuchElementException(f"No elements found for selector: {selector}")
+    if index >= len(elements_selenium):
+        raise IndexError(f"Index {index} out of bounds. Only {len(elements_selenium)} elements found for selector: {selector}")
+    target_element = elements_selenium[index]
+    if not target_element.is_displayed() or not target_element.is_enabled():
+        logger.warning(f"Input element {index} for selector {selector} is not visible or enabled. Attempting input anyway.")
+        # Try scrolling into view
+        try:
+             _browser_driver.execute_script("arguments[0].scrollIntoView(true);", target_element)
+             time.sleep(0.5)
+        except Exception as scroll_err:
+             logger.warning(f"Could not scroll input element into view: {scroll_err}")
+    # Use Helium write, passing the Selenium element
+    write(text, into=target_element)
+    time.sleep(0.5)
+    if press_enter:
+        press(Keys.ENTER)
+        time.sleep(1.5) # Wait longer if Enter was pressed
+        return f"Input text into element {index} ({selector}) and pressed Enter. Current URL: {_browser_driver.current_url}"
+    else:
+        return f"Input text into element {index} ({selector})."
+@browser_tool_handler
+def scroll_page(direction: str = "down", amount: str = "page") -> str:
+    """Scroll the page up or down by a specified amount ('page', 'top', 'bottom', or pixels)."""
+    logger.info(f"Scrolling {direction} by {amount}")
+    if direction not in ["up", "down"]:
+        raise ValueError("Direction must be \"up\" or \"down\".")
+    if amount == "page":
+        scroll_script = "window.scrollBy(0, window.innerHeight);" if direction == "down" else "window.scrollBy(0, -window.innerHeight);"
+    elif amount == "top":
+         scroll_script = "window.scrollTo(0, 0);"
+    elif amount == "bottom":
+         scroll_script = "window.scrollTo(0, document.body.scrollHeight);"
+    else:
+        try:
+            pixels = int(amount)
+            scroll_script = f"window.scrollBy(0, {pixels});" if direction == "down" else f"window.scrollBy(0, {-pixels});"
+        except ValueError:
+            raise ValueError("Amount must be \"page\", \"top\", \"bottom\", or a number of pixels.")
+    _browser_driver.execute_script(scroll_script)
+    time.sleep(1) # Wait for scroll effects
+    return f"Scrolled {direction} by {amount}."
+@browser_tool_handler
+def go_back() -> str:
+    """Navigate the browser back one step in its history."""
+    logger.info("Navigating back...")
+    _browser_driver.back()
+    time.sleep(1.5) # Wait after navigation
+    return f"Navigated back. Current URL: {_browser_driver.current_url}"
+@browser_tool_handler
+def close_popups() -> str:
+    """Send an ESC keypress to attempt to dismiss modals or pop-ups."""
+    logger.info("Sending ESC key...")
+    webdriver.ActionChains(_browser_driver).send_keys(Keys.ESCAPE).perform()
+    time.sleep(0.5)
+    return "Sent ESC key press."
+# --- Search Engine & Data Source Tools ---
+# --- Agent Initializer Class ---
+class ResearchAgentInitializer:
+    def __init__(self):
+        logger.info("Initializing ResearchAgent resources...")
+        self.llm = None
+        self.browser_tools = []
+        self.search_tools = []
+        self.datasource_tools = []
+        self.youtube_tool = None # Added for YouTube tool
+        # Initialize LLM
+        self._initialize_llm()
+        # Initialize Browser (conditionally)
+        if SELENIUM_AVAILABLE:
+            self._initialize_browser()
+            self._create_browser_tools()
+        else:
+             logger.warning("Browser tools are disabled as Selenium/Helium are not available.")
+        # Initialize Search/Datasource Tools
+        self._create_search_tools()
+        self._create_datasource_tools()
+        self._create_youtube_tool() # Added
+        logger.info("ResearchAgent resources initialized.")
+    def _initialize_llm(self):
+        agent_llm_model = os.getenv("RESEARCH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+        gemini_api_key = os.getenv("GEMINI_API_KEY")
+        if not gemini_api_key:
+            logger.error("GEMINI_API_KEY not found for ResearchAgent LLM.")
+            raise ValueError("GEMINI_API_KEY must be set for ResearchAgent")
+        try:
+            self.llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+            logger.info(f"ResearchAgent LLM initialized: {agent_llm_model}")
+        except Exception as e:
+            logger.error(f"Failed to initialize ResearchAgent LLM: {e}", exc_info=True)
+            raise
+    def _initialize_browser(self):
+        global _browser_instance, _browser_driver
+        if _browser_instance is None:
+            logger.info("Initializing browser (Chrome headless)...")
+            try:
+                chrome_options = webdriver.ChromeOptions()
+                # Configurable options from env vars
+                if os.getenv("RESEARCH_AGENT_CHROME_NO_SANDBOX", "true").lower() == "true":
+                    chrome_options.add_argument("--no-sandbox")
+                if os.getenv("RESEARCH_AGENT_CHROME_DISABLE_DEV_SHM", "true").lower() == "true":
+                    chrome_options.add_argument("--disable-dev-shm-usage")
+                # Add prefs for downloads/popups
+                chrome_options.add_experimental_option("prefs", {
+                    "download.prompt_for_download": False,
+                    "plugins.always_open_pdf_externally": True,
+                    "profile.default_content_settings.popups": 0
+                })
+                # Start Chrome using Helium
+                _browser_instance = start_chrome(headless=True, options=chrome_options)
+                _browser_driver = get_driver() # Get the underlying Selenium driver
+                logger.info("Browser initialized successfully.")
+            except Exception as e:
+                logger.error(f"Failed to initialize browser: {e}", exc_info=True)
+                # Set flags to prevent tool usage
+                global SELENIUM_AVAILABLE
+                SELENIUM_AVAILABLE = False
+                _browser_instance = None
+                _browser_driver = None
+    def _create_browser_tools(self):
+        if not SELENIUM_AVAILABLE:
+            self.browser_tools = []
+            return
+        self.browser_tools = [
+            FunctionTool.from_defaults(fn=visit, name="visit_url"), # Renamed for clarity
+            FunctionTool.from_defaults(fn=get_text_by_css, name="get_text_by_css"),
+            FunctionTool.from_defaults(fn=get_page_html, name="get_page_html"),
+            FunctionTool.from_defaults(fn=click_element_by_css, name="click_element_by_css"),
+            FunctionTool.from_defaults(fn=input_text_by_css, name="input_text_by_css"),
+            FunctionTool.from_defaults(fn=scroll_page, name="scroll_page"),
+            FunctionTool.from_defaults(fn=go_back, name="navigate_back"), # Renamed
+            FunctionTool.from_defaults(fn=close_popups, name="close_popups"),
+        ]
+        for tool in self.browser_tools:
+             tool.metadata.description = f"(Browser) {tool.metadata.description}"
+        logger.info(f"Created {len(self.browser_tools)} browser interaction tools.")
+    def _create_search_tools(self):
+        self.search_tools = []
+        # Google Search
+        google_spec = GoogleSearchToolSpec(key=os.getenv("GOOGLE_API_KEY"), engine=os.getenv("GOOGLE_CSE_ID"))
+        if google_spec:
+            google_tool = FunctionTool.from_defaults(fn=google_spec.google_search, name="google_search")
+            google_tool.metadata.description = "(Search) Execute a Google Custom Search query. Returns structured results."
+            self.search_tools.append(google_tool)
+        # Tavily Search
+        tavily_spec = TavilyToolSpec(api_key=os.getenv("TAVILY_API_KEY"))
+        if tavily_spec:
+            # Use search method which is more general
+            tavily_tool = FunctionTool.from_defaults(fn=tavily_spec.search, name="tavily_search")
+            tavily_tool.metadata.description = "(Search) Perform a deep research search using Tavily API. Good for finding documents/articles."
+            self.search_tools.append(tavily_tool)
+        # DuckDuckGo Search
+        ddg_spec = DuckDuckGoSearchToolSpec()
+        if ddg_spec:
+            ddg_tool = FunctionTool.from_defaults(fn=ddg_spec.duckduckgo_full_search, name="duckduckgo_search")
+            ddg_tool.metadata.description = "(Search) Execute a DuckDuckGo search. Returns structured results."
+            self.search_tools.append(ddg_tool)
+        logger.info(f"Created {len(self.search_tools)} search engine tools.")
+    def _create_datasource_tools(self):
+        self.datasource_tools = []
+        # Wikipedia
+        wiki_spec = WikipediaToolSpec()
+        if wiki_spec:
+            wiki_search_tool = FunctionTool.from_defaults(fn=wiki_spec.search_data, name="wikipedia_search_pages")
+            wiki_search_tool.metadata.description = "(Wikipedia) Search for Wikipedia page titles matching a query."
+            wiki_load_tool = FunctionTool.from_defaults(fn=wiki_spec.load_data, name="wikipedia_load_page")
+            wiki_load_tool.metadata.description = "(Wikipedia) Load the full content of a specific Wikipedia page title."
+            self.datasource_tools.extend([wiki_search_tool, wiki_load_tool])
+        # Yahoo Finance
+        yf_spec = YahooFinanceToolSpec()
+        if yf_spec:
+            yf_tools_map = {
+                "balance_sheet": "Get the latest balance sheet for a stock ticker.",
+                "income_statement": "Get the latest income statement for a stock ticker.",
+                "cash_flow": "Get the latest cash flow statement for a stock ticker.",
+                "stock_basic_info": "Get basic info (price, market cap, summary) for a stock ticker.",
+                "stock_analyst_recommendations": "Get analyst recommendations for a stock ticker.",
+                "stock_news": "Get recent news headlines for a stock ticker."
+            }
+            for func_name, desc in yf_tools_map.items():
+                if hasattr(yf_spec, func_name):
+                    tool = FunctionTool.from_defaults(fn=getattr(yf_spec, func_name), name=f"yahoo_finance_{func_name}")
+                    tool.metadata.description = f"(YahooFinance) {desc}"
+                    self.datasource_tools.append(tool)
+                else:
+                     logger.warning(f"YahooFinance function {func_name} not found in spec.")
+        # ArXiv
+        arxiv_spec = ArxivToolSpec()
+        if arxiv_spec:
+            arxiv_tool = FunctionTool.from_defaults(fn=arxiv_spec.arxiv_query, name="arxiv_search")
+            arxiv_tool.metadata.description = "(ArXiv) Search ArXiv for academic papers matching a query."
+            self.datasource_tools.append(arxiv_tool)
+        logger.info(f"Created {len(self.datasource_tools)} specific data source tools.")
+    def _create_youtube_tool(self): # Added method
+        if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
+            self.youtube_tool = FunctionTool.from_defaults(
+                fn=get_youtube_transcript,
+                name="get_youtube_transcript",
+                description=(
+                    "(YouTube) Fetches the transcript text for a given YouTube video URL or video ID. "
+                    "Specify preferred languages (e.g., [\"en\", \"es\"]). Returns transcript or error."
+                )
+            )
+            logger.info("Created YouTube transcript tool.")
+        else:
+            self.youtube_tool = None
+            logger.warning("YouTube transcript tool disabled because youtube-transcript-api is not installed.")
+    def get_agent(self) -> ReActAgent:
+        """Creates and returns the configured ReActAgent for research."""
+        logger.info("Creating ResearchAgent ReActAgent instance...")
+        all_tools = self.browser_tools + self.search_tools + self.datasource_tools
+        if self.youtube_tool: # Add YouTube tool if available
+             all_tools.append(self.youtube_tool)
+        if not all_tools:
+             logger.warning("No tools available for ResearchAgent. It will likely be unable to function.")
+        # System prompt (consider loading from file)
+        # Updated prompt to include YouTube tool
+        system_prompt = """\
+        You are ResearchAgent, an autonomous web research assistant. Your goal is to gather information accurately and efficiently using the available tools.
+        Available Tool Categories:
+        - (Browser): Tools for direct web page interaction (visiting URLs, clicking, scrolling, extracting text/HTML, inputting text).
+        - (Search): Tools for querying search engines (Google, DuckDuckGo, Tavily).
+        - (Wikipedia): Tools for searching and loading Wikipedia pages.
+        - (YahooFinance): Tools for retrieving financial data (balance sheets, income statements, stock info, news).
+        - (ArXiv): Tool for searching academic papers on ArXiv.
+        - (YouTube): Tool for fetching video transcripts (`get_youtube_transcript`).
+        Workflow:
+        1. **Thought**: Analyze the research goal. Break it down if necessary. Choose the *single best tool* for the *next immediate step*. Explain your choice. Consider the information needed and which tool provides it most directly (e.g., use YahooFinance for stock prices, Google/DDG for general web search, Tavily for document search, ArXiv for papers, Wikipedia for encyclopedic info, YouTube for video transcripts, Browser tools for specific website interaction).
+        2. **Action**: Call the chosen tool with the correct arguments. Ensure inputs match the tool's requirements (e.g., URL or video ID for YouTube).
+        3. **Observation**: Examine the tool's output. Extract the relevant information. Check for errors.
+        4. **Reflect & Iterate**: Does the observation satisfy the immediate goal? Do you have enough information for the overall research task? If not, return to step 1 (Thought) to plan the *next* single step. If a tool failed, consider why and try an alternative tool or approach.
+        5. **Synthesize**: Once all necessary information is gathered, synthesize the findings into a coherent answer to the original research goal.
+        6. **Hand-Off**: Pass the synthesized findings to the appropriate next agent: **code_agent** (for coding), **math_agent** (for math), **text_analyzer_agent** (for text analysis), **planner_agent** (for planning/synthesis), or **reasoning_agent** (for logic/reasoning).
+        Constraints:
+        - Use only one tool per Action step.
+        - Think step-by-step.
+        - If using browser tools, start with `visit_url`.
+        - Be mindful of potential errors and try alternative tools if one fails.
+        - Synthesize results *before* handing off.
+        """
+        agent = ReActAgent(
+            name="research_agent",
+            description=(
+                "Performs web research using browser interaction, search engines (Google, DDG, Tavily), "
+                "specific data sources (Wikipedia, YahooFinance, ArXiv), and YouTube transcript fetching. Follows Thought-Action-Observation loop."
+            ),
+            tools=all_tools,
+            llm=self.llm,
+            system_prompt=system_prompt,
+            can_handoff_to=[
+                "code_agent",
+                "math_agent",
+                "text_analyzer_agent", # Added based on original prompt
+                "planner_agent",
+                "reasoning_agent"
+            ],
+        )
+        logger.info("ResearchAgent ReActAgent instance created.")
+        return agent
+    def close_browser(self):
+        """Closes the browser instance if it was initialized."""
+        global _browser_instance, _browser_driver
+        if _browser_instance:
+            logger.info("Closing browser instance...")
+            try:
+                kill_browser() # Use Helium's function
+                logger.info("Browser closed successfully.")
+            except Exception as e:
+                logger.error(f"Error closing browser: {e}", exc_info=True)
+            finally:
+                _browser_instance = None
+                _browser_driver = None
+        else:
+            logger.info("No active browser instance to close.")
+# --- Singleton Initializer Instance ---
+_research_agent_initializer_instance = None
+def get_research_initializer():
+    """Gets the singleton instance of ResearchAgentInitializer."""
+    global _research_agent_initializer_instance
+    if _research_agent_initializer_instance is None:
+        logger.info("Instantiating ResearchAgentInitializer for the first time.")
+        _research_agent_initializer_instance = ResearchAgentInitializer()
+    return _research_agent_initializer_instance
+# --- Public Initialization Function ---
+def initialize_research_agent() -> ReActAgent:
+    """Initializes and returns the Research Agent using a singleton initializer."""
+    logger.info("initialize_research_agent called.")
+    initializer = get_research_initializer()
+    return initializer.get_agent()
+# --- Cleanup Function (Optional but recommended) ---
+def cleanup_research_agent_resources():
+    """Cleans up resources used by the research agent, like the browser."""
+    logger.info("Cleaning up research agent resources...")
+    initializer = get_research_initializer() # Ensure it exists
+    initializer.close_browser()
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running research_agent.py directly for testing...")
+    # Check required keys
+    required_keys = ["GEMINI_API_KEY"] # Others are optional depending on tools needed
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        # Warn about optional keys
+        optional_keys = ["GOOGLE_API_KEY", "GOOGLE_CSE_ID", "TAVILY_API_KEY", "WOLFRAM_ALPHA_APP_ID"]
+        missing_optional = [key for key in optional_keys if not os.getenv(key)]
+        if missing_optional:
+            print(f"Warning: Optional environment variable(s) not set: {', '.join(missing_optional)}. Some tools may be unavailable.")
+        test_agent = None
+        try:
+            # Test YouTube transcript tool directly
+            if YOUTUBE_TRANSCRIPT_API_AVAILABLE:
+                 print("\nTesting YouTube transcript tool...")
+                 # Example video: "Attention is All You Need" paper explanation
+                 yt_url = "https://www.youtube.com/watch?v=TQQlZhbC5ps"
+                 transcript = get_youtube_transcript(yt_url)
+                 if not transcript.startswith("Error:"):
+                     print(f"Transcript fetched (first 500 chars):\n{transcript[:500]}...")
+                 else:
+                     print(f"YouTube Transcript Fetch Failed: {transcript}")
+            else:
+                 print("\nSkipping YouTube transcript test as youtube-transcript-api is not available.")
+            # Initialize agent AFTER testing standalone functions
+            test_agent = initialize_research_agent()
+            print("\nResearch Agent initialized successfully for testing.")
+            # Example test (requires browser tools to be available)
+            # if SELENIUM_AVAILABLE:
+            #      print("\nTesting browser visit...")
+            #      result = test_agent.chat("Visit https://example.com and tell me the main heading text using CSS selector 'h1'")
+            #      print(f"Test query result: {result}")
+            # else:
+            #      print("\nSkipping browser test as Selenium/Helium are not available.")
+            # Example search test (requires GOOGLE keys)
+            # if os.getenv("GOOGLE_API_KEY") and os.getenv("GOOGLE_CSE_ID"):
+            #      print("\nTesting Google Search...")
+            #      result_search = test_agent.chat("Search for 'LlamaIndex Agent Workflow'")
+            #      print(f"Search test result: {result_search}")
+            # else:
+            #      print("\nSkipping Google Search test as API keys are not set.")
+        except Exception as e:
+            print(f"Error during testing: {e}")
+        finally:
+            # Clean up browser if it was started
+            if test_agent:
+                 print("\nCleaning up resources...")
+                 cleanup_research_agent_resources()

agents/role_agent.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import os
+import logging
+from dotenv import load_dotenv
+import datasets
+from llama_index.core import Document, VectorStoreIndex
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.retrievers import QueryFusionRetriever
+from llama_index.core.retrievers.fusion_retriever import FUSION_MODES
+from llama_index.core.tools import FunctionTool
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.postprocessor import SentenceTransformerRerank
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.retrievers.bm25 import BM25Retriever
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# --- Tool Function ---
+# Note: This function now relies on being bound to an instance of RoleAgentInitializer
+# or having retriever/reranker passed to it.
+# We will bind it to the instance method within the class.
+# --- Initializer Class ---
+class RoleAgentInitializer:
+    def __init__(self):
+        logger.info("Initializing RoleAgent resources...")
+        # Configuration from environment variables
+        self.embed_model_name = os.getenv("ROLE_EMBED_MODEL", "Snowflake/snowflake-arctic-embed-l-v2.0")
+        self.reranker_model_name = os.getenv("ROLE_RERANKER_MODEL", "Alibaba-NLP/gte-multilingual-reranker-base")
+        self.dataset_name = os.getenv("ROLE_PROMPT_DATASET", "fka/awesome-chatgpt-prompts")
+        self.llm_model_name = os.getenv("ROLE_LLM_MODEL", "models/gemini-1.5-pro")
+        self.gemini_api_key = os.getenv("GEMINI_API_KEY")
+        if not self.gemini_api_key:
+            logger.error("GEMINI_API_KEY not found in environment variables.")
+            raise ValueError("GEMINI_API_KEY must be set")
+        # Initialize models and components
+        try:
+            logger.info(f"Loading embed model: {self.embed_model_name}")
+            self.embed_model = HuggingFaceEmbedding(model_name=self.embed_model_name)
+            logger.info(f"Loading reranker model: {self.reranker_model_name}")
+            self.reranker = SentenceTransformerRerank(
+                model=self.reranker_model_name,
+                top_n=3
+            )
+            # Load the dataset
+            logger.info(f"Loading dataset: {self.dataset_name}")
+            prompts_dataset = datasets.load_dataset(self.dataset_name, split="train")
+            # Convert the dataset to a list of Documents
+            logger.info("Converting dataset to LlamaIndex Documents...")
+            documents = [
+                Document(
+                    text="\n".join([
+                        f"Act: {prompts_dataset['act'][i]}",
+                        f"Prompt: {prompts_dataset['prompt'][i]}",
+                    ]),
+                    metadata={"act": prompts_dataset["act"][i]}
+                )
+                for i in range(len(prompts_dataset))
+            ]
+            splitter = SentenceSplitter(chunk_size=256, chunk_overlap=20)
+            logger.info("Building vector index (this may take time)...")
+            index = VectorStoreIndex.from_documents(
+                documents,
+                embed_model=self.embed_model,
+                show_progress=True,
+                transformations=[splitter]
+            )
+            logger.info("Vector index built.")
+            logger.info("Building BM25 retriever...")
+            bm25_retriever = BM25Retriever.from_defaults(
+                docstore=index.docstore,
+                similarity_top_k=2
+            )
+            vector_retriever = index.as_retriever(similarity_top_k=2)
+            logger.info("Building query fusion retriever...")
+            self.retriever = QueryFusionRetriever(
+                [vector_retriever, bm25_retriever],
+                similarity_top_k=2,
+                mode=FUSION_MODES.RECIPROCAL_RANK,
+                verbose=True,
+            )
+            logger.info("RoleAgent resources initialized successfully.")
+        except Exception as e:
+            logger.error(f"Error during RoleAgent resource initialization: {e}", exc_info=True)
+            raise
+    def role_prompt_retriever_method(self, query: str) -> str:
+        """
+        Instance method to retrieve and return detailed role or task information.
+        Uses the retriever and reranker initialized in this class instance.
+        Args:
+            query (str): The user query describing the desired role, task, or prompt context.
+        Returns:
+            str: A string containing the assigned role/task description, or a message indicating no matching prompt was found.
+        """
+        logger.info(f"Role prompt retriever called with query: {query[:100]}...")
+        try:
+            results = self.retriever.retrieve(query)
+            reranked_results = self.reranker.postprocess_nodes(results, query_str=query)
+            if reranked_results:
+                # Return top 3 results as per original logic
+                top_results_text = "\n\n".join([node.get_content() for node in reranked_results[:3]])
+                logger.info(f"Retrieved and reranked {len(reranked_results)} results. Returning top 3.")
+                return top_results_text
+            else:
+                logger.warning("No matching role prompt found after reranking.")
+                return "No matching role prompt found."
+        except Exception as e:
+            logger.error(f"Error during role prompt retrieval: {e}", exc_info=True)
+            return f"Error retrieving role prompt: {e}"
+    def get_agent(self) -> ReActAgent:
+        """Creates and returns the configured ReActAgent for role selection."""
+        logger.info("Creating RoleAgent ReActAgent instance...")
+        # Create the tool, binding the method to this instance
+        role_prompt_retriever_tool = FunctionTool.from_defaults(
+            fn=self.role_prompt_retriever_method, # Use the instance method
+            name="role_prompt_retriever",
+            description="Retrieve and summarize the top three role or task prompts for "
+                        "a query using BM25 and embedding retrieval with reranking.",
+        )
+        # System prompt (consider loading from file in future)
+        system_prompt = """\
+        You are RoleAgent, an expert context‐setter that interprets user inputs and deterministically assigns the most fitting persona or task schema to guide downstream agents. For every query:
+        1. **Interpret Intent**: Parse the user’s instruction to understand their goal, domain, and required expertise.
+        2. **Retrieve & Rank**: Use the `role_prompt_retriever` tool to fetch the top role descriptions relevant to the intent.
+        3. **Select Role**: Based *only* on the retrieved results, choose the single best‐matching persona (e.g. “Developer Assistant,” “SEO Strategist,” “Translation Engine,” “Terminal Emulator”) without asking the user any follow-up. If no relevant role is found, state that clearly.
+        4. **Respond**: Output in plain text with:
+           - **Role**: The selected persona (or "None Found").
+           - **Reason**: Briefly explain why this role was chosen based *only* on the retrieved text.
+           - **Prompt**: The corresponding role prompt from the retrieved text to be used by downstream agents (or "N/A" if none found).
+        5. **Hand-Off**: Immediately after including the chosen prompt (or N/A) in your response, invoke `planner_agent` to begin breaking down the user’s request into actionable sub-questions.
+        Always conclude your response with the full prompt for the next agent (or "N/A") and the invocation instruction for `planner_agent`.
+        """
+        llm = GoogleGenAI(
+            api_key=self.gemini_api_key,
+            model=self.llm_model_name,
+        )
+        agent = ReActAgent(
+            name="role_agent",
+            description=(
+                "RoleAgent selects the most appropriate persona or task template based on the user’s query. "
+                "By evaluating the question’s intent and context using a specialized retriever, it chooses or refines a prompt that aligns "
+                "with the best-fitting role—whether developer, analyst, translator, planner, or otherwise—so that "
+                "subsequent agents can respond effectively under the optimal role context."
+            ),
+            tools=[role_prompt_retriever_tool],
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=["planner_agent"],
+        )
+        logger.info("RoleAgent ReActAgent instance created.")
+        return agent
+# --- Global Initializer Instance (Singleton Pattern) ---
+# Instantiate the initializer once when the module is loaded.
+# This ensures expensive operations (model loading, index building) happen only once.
+_role_agent_initializer_instance = None
+def get_initializer():
+    global _role_agent_initializer_instance
+    if _role_agent_initializer_instance is None:
+        logger.info("Instantiating RoleAgentInitializer for the first time.")
+        _role_agent_initializer_instance = RoleAgentInitializer()
+    return _role_agent_initializer_instance
+# --- Public Initialization Function ---
+def initialize_role_agent() -> ReActAgent:
+    """Initializes and returns the Role Agent.
+    Uses a singleton pattern to ensure resources are loaded only once.
+    """
+    logger.info("initialize_role_agent called.")
+    initializer = get_initializer()
+    return initializer.get_agent()
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running role_agent.py directly for testing...")
+    # Ensure API key is set for testing
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
+    else:
+        try:
+            test_agent = initialize_role_agent()
+            print("Role Agent initialized successfully for testing.")
+            # You could add a simple test query here if needed
+            # e.g., result = test_agent.chat("act as a linux terminal")
+            # print(f"Test query result: {result}")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/text_analyzer_agent.py ADDED Viewed

	@@ -0,0 +1,388 @@

+import os
+import certifi
+import logging
+import subprocess # For calling ffmpeg if needed
+from typing import List, Dict, Optional
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core import Document
+# Attempt to import Whisper
+try:
+    import whisper
+    WHISPER_AVAILABLE = True
+except ImportError:
+    logging.warning("openai-whisper not installed. Audio transcription tool will be unavailable.")
+    WHISPER_AVAILABLE = False
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+# Global Whisper model instance (lazy loaded)
+_whisper_model = None
+os.environ["SSL_CERT_FILE"] = certifi.where()
+# Helper function to load prompt from file
+def load_prompt_from_file(filename: str, default_prompt: str) -> str:
+    """Loads a prompt from a text file."""
+    try:
+        script_dir = os.path.dirname(__file__)
+        prompt_path = os.path.join(script_dir, filename)
+        with open(prompt_path, "r") as f:
+            prompt = f.read()
+            logger.info(f"Successfully loaded prompt from {prompt_path}")
+            return prompt
+    except FileNotFoundError:
+        logger.warning(f"Prompt file {filename} not found at {prompt_path}. Using default.")
+        return default_prompt
+    except Exception as e:
+        logger.error(f"Error loading prompt file {filename}: {e}", exc_info=True)
+        return default_prompt
+# --- Helper function to load Whisper model ---
+def _load_whisper_model(model_size: str = "small") -> Optional[object]:
+    """Loads the Whisper model instance, lazy loading."""
+    global _whisper_model
+    if not WHISPER_AVAILABLE:
+        logger.error("Whisper library not available, cannot load model.")
+        return None
+    if _whisper_model is None:
+        try:
+            logger.info(f"Loading Whisper model: {model_size}...")
+            # Allow model size selection via env var, default to "base"
+            selected_model_size = os.getenv("WHISPER_MODEL_SIZE", model_size)
+            print(f"Available Whisper models: {whisper.available_models()}")
+            _whisper_model = whisper.load_model(selected_model_size)
+            logger.info(f"Whisper model {selected_model_size} loaded successfully.")
+        except Exception as e:
+            logger.error(f"Failed to load Whisper model {selected_model_size}: {e}", exc_info=True)
+            _whisper_model = None # Ensure it remains None on failure
+    return _whisper_model
+# --- Tool Functions ---
+def summarize_text(text: str, max_length: int = 150, min_length: int = 30) -> str:
+    """Summarize the provided text using an LLM."""
+    logger.info(f"Summarizing text (length: {len(text)} chars). Max/Min length: {max_length}/{min_length}")
+    # Configuration for summarization LLM
+    summarizer_llm_model = os.getenv("SUMMARIZER_LLM_MODEL", "models/gemini-1.5-flash") # Use flash for speed
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for summarization tool LLM.")
+        return "Error: GEMINI_API_KEY not set for summarization."
+    # Truncate input text if excessively long to avoid API limits/costs
+    max_input_chars = 30000 # Example limit, adjust as needed
+    if len(text) > max_input_chars:
+        logger.warning(f"Input text truncated to {max_input_chars} chars for summarization.")
+        text = text[:max_input_chars]
+    prompt = (
+        f"Summarize the following text concisely. Aim for a length between {min_length} and {max_length} words. "
+        f"Focus on the main points and key information.\n\n"
+        f"TEXT:\n{text}\n\nSUMMARY:"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=summarizer_llm_model)
+        logger.info(f"Using summarization LLM: {summarizer_llm_model}")
+        response = llm.complete(prompt)
+        summary = response.text.strip()
+        logger.info(f"Summarization successful (output length: {len(summary.split())} words).")
+        return summary
+    except Exception as e:
+        logger.error(f"LLM call failed during summarization: {e}", exc_info=True)
+        return f"Error during summarization: {e}"
+def extract_entities(text: str, entity_types: List[str] = ["PERSON", "ORG", "GPE", "DATE", "EVENT"]) -> Dict[str, List[str]]:
+    """Extract named entities (like people, organizations, locations, dates) from the text using an LLM."""
+    logger.info(f"Extracting entities (types: {entity_types}) from text (length: {len(text)} chars).")
+    # Configuration for entity extraction LLM
+    entity_llm_model = os.getenv("ENTITY_LLM_MODEL", "models/gemini-1.5-flash") # Use flash for speed
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for entity extraction tool LLM.")
+        return {"error": "GEMINI_API_KEY not set for entity extraction."}
+    # Truncate input text if excessively long
+    max_input_chars = 30000 # Example limit
+    if len(text) > max_input_chars:
+        logger.warning(f"Input text truncated to {max_input_chars} chars for entity extraction.")
+        text = text[:max_input_chars]
+    # Define the desired output format clearly in the prompt
+    prompt = (
+        f"Extract named entities from the following text. Identify entities of these types: {', '.join(entity_types)}. "
+        f"Format the output as a JSON object where keys are the entity types (uppercase) and values are lists of unique strings found for that type. "
+        f"If no entities of a type are found, include the key with an empty list.\n\n"
+        f"TEXT:\n{text}\n\nJSON_OUTPUT:"
+    )
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=entity_llm_model, response_mime_type="application/json") # Request JSON output
+        logger.info(f"Using entity extraction LLM: {entity_llm_model}")
+        response = llm.complete(prompt)
+        # Attempt to parse the JSON response
+        import json
+        try:
+            # The response might be wrapped in ```json ... ```, try to extract it
+            json_str = response.text.strip()
+            if json_str.startswith("```json"):
+                json_str = json_str[7:]
+            if json_str.endswith("```"):
+                json_str = json_str[:-3]
+            entities = json.loads(json_str.strip())
+            # Validate structure (optional but good practice)
+            if not isinstance(entities, dict):
+                 raise ValueError("LLM response is not a JSON object.")
+            # Ensure all requested types are present, even if empty
+            for entity_type in entity_types:
+                if entity_type not in entities:
+                    entities[entity_type] = []
+                elif not isinstance(entities[entity_type], list):
+                     logger.warning(f"Entity type {entity_type} value is not a list, converting.")
+                     entities[entity_type] = [str(entities[entity_type])] # Attempt conversion
+            logger.info(f"Entity extraction successful. Found entities: { {k: len(v) for k, v in entities.items()} }")
+            return entities
+        except json.JSONDecodeError as json_err:
+            logger.error(f"Failed to parse JSON response from LLM: {json_err}. Response text: {response.text}")
+            return {"error": f"Failed to parse LLM JSON response: {json_err}"}
+        except ValueError as val_err:
+             logger.error(f"Invalid JSON structure from LLM: {val_err}. Response text: {response.text}")
+             return {"error": f"Invalid JSON structure from LLM: {val_err}"}
+    except Exception as e:
+        logger.error(f"LLM call failed during entity extraction: {e}", exc_info=True)
+        return {"error": f"Error during entity extraction: {e}"}
+def split_text_into_chunks(text: str, chunk_size: int = 1000, chunk_overlap: int = 200) -> List[str]:
+    """Split a long text into smaller chunks suitable for processing."""
+    logger.info(f"Splitting text (length: {len(text)} chars) into chunks (size: {chunk_size}, overlap: {chunk_overlap}).")
+    if not text:
+        return []
+    try:
+        splitter = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+        document = Document(text=text)
+        nodes = splitter.get_nodes_from_documents([document])
+        chunks = [node.get_content() for node in nodes]
+        logger.info(f"Text split into {len(chunks)} chunks.")
+        return chunks
+    except Exception as e:
+        logger.error(f"Error splitting text: {e}", exc_info=True)
+        # Fallback to simple splitting if SentenceSplitter fails
+        logger.warning("Falling back to simple text splitting.")
+        return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size - chunk_overlap)]
+def transcribe_audio(audio_file_path: str, language: Optional[str] = None) -> str:
+    """Transcribes an audio file using the OpenAI Whisper model.
+       Args:
+           audio_file_path (str): The path to the audio file (e.g., mp3, wav, m4a).
+           language (Optional[str]): The language code (e.g., "en", "es") or full name ("English", "Spanish").
+                                     If None, Whisper will detect the language.
+       Returns:
+           str: The transcribed text or an error message.
+    """
+    logger.info(f"Attempting to transcribe audio file: {audio_file_path}, Language: {language}")
+    # Check if Whisper is available
+    if not WHISPER_AVAILABLE:
+        return "Error: openai-whisper library is required but not installed."
+    # Check if file exists
+    if not os.path.exists(audio_file_path):
+        logger.error(f"Audio file not found: {audio_file_path}")
+        return f"Error: Audio file not found at {audio_file_path}"
+    # Load the Whisper model (lazy loading)
+    model = _load_whisper_model() # Uses default size "base" or WHISPER_MODEL_SIZE env var
+    if model is None:
+        return "Error: Failed to load Whisper model."
+    try:
+        # Perform transcription
+        # The transcribe function handles various audio formats via ffmpeg
+        result = model.transcribe(audio_file_path, language=language)
+        transcribed_text = result["text"]
+        detected_language = result.get("language", "unknown") # Get detected language if available
+        logger.info(f"Audio transcription successful. Detected language: {detected_language}. Text length: {len(transcribed_text)}")
+        return transcribed_text
+    except Exception as e:
+        # Check if it might be an ffmpeg issue
+        if "ffmpeg" in str(e).lower():
+             logger.error(f"Error during transcription, possibly ffmpeg issue: {e}", exc_info=True)
+             # Check if ffmpeg is installed using shell command
+             try:
+                 subprocess.run(["ffmpeg", "-version"], check=True, capture_output=True)
+                 # If ffmpeg is installed, the error is likely something else
+                 return f"Error during transcription (ffmpeg seems installed): {e}"
+             except (FileNotFoundError, subprocess.CalledProcessError):
+                 logger.error("ffmpeg command not found or failed. Please ensure ffmpeg is installed and in PATH.")
+                 return "Error: ffmpeg not found or not working. Please install ffmpeg."
+        else:
+             logger.error(f"Unexpected error during transcription: {e}", exc_info=True)
+             return f"Error during transcription: {e}"
+# --- Tool Definitions ---
+summarize_tool = FunctionTool.from_defaults(
+    fn=summarize_text,
+    name="summarize_text",
+    description=(
+        "Summarizes a given block of text. Useful for condensing long documents or articles. "
+        "Input: text (str), Optional: max_length (int), min_length (int). Output: summary (str) or error."
+    ),
+)
+extract_entities_tool = FunctionTool.from_defaults(
+    fn=extract_entities,
+    name="extract_entities",
+    description=(
+        "Extracts named entities (people, organizations, locations, dates, events) from text. "
+        "Input: text (str), Optional: entity_types (List[str]). Output: Dict[str, List[str]] or error dict."
+    ),
+)
+split_text_tool = FunctionTool.from_defaults(
+    fn=split_text_into_chunks,
+    name="split_text_into_chunks",
+    description=(
+        "Splits a long text document into smaller, overlapping chunks. "
+        "Input: text (str), Optional: chunk_size (int), chunk_overlap (int). Output: List[str] of chunks."
+    ),
+)
+# Conditionally create transcribe_audio_tool
+transcribe_audio_tool = None
+if WHISPER_AVAILABLE:
+    transcribe_audio_tool = FunctionTool.from_defaults(
+        fn=transcribe_audio,
+        name="transcribe_audio_file",
+        description=(
+            "Transcribes speech from an audio file (e.g., mp3, wav, m4a) into text using Whisper. "
+            "Input: audio_file_path (str), Optional: language (str - e.g., \"en\", \"Spanish\"). "
+            "Output: transcribed text (str) or error message."
+        ),
+    )
+    logger.info("Audio transcription tool created.")
+else:
+    logger.warning("Audio transcription tool disabled because openai-whisper is not installed.")
+# --- Agent Initialization ---
+def initialize_text_analyzer_agent() -> ReActAgent:
+    """Initializes the Text Analyzer Agent."""
+    logger.info("Initializing TextAnalyzerAgent...")
+    # Configuration for the agent's main LLM
+    agent_llm_model = os.getenv("TEXT_ANALYZER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+    gemini_api_key = os.getenv("GEMINI_API_KEY")
+    if not gemini_api_key:
+        logger.error("GEMINI_API_KEY not found for TextAnalyzerAgent.")
+        raise ValueError("GEMINI_API_KEY must be set for TextAnalyzerAgent")
+    try:
+        llm = GoogleGenAI(api_key=gemini_api_key, model=agent_llm_model)
+        logger.info(f"Using agent LLM: {agent_llm_model}")
+        # Load system prompt
+        default_system_prompt = ("You are TextAnalyzerAgent... [Default prompt content - replace with actual]" # Placeholder
+                              )
+        system_prompt = load_prompt_from_file("../prompts/text_analyzer_prompt.txt", default_system_prompt)
+        if system_prompt == default_system_prompt:
+             logger.warning("Using default/fallback system prompt for TextAnalyzerAgent.")
+        # Define available tools, including the audio tool if available
+        tools = [summarize_tool, extract_entities_tool, split_text_tool]
+        if transcribe_audio_tool:
+            tools.append(transcribe_audio_tool)
+        # Update agent description based on available tools
+        agent_description = (
+            "Analyzes text content. Can summarize text (`summarize_text`), extract named entities (`extract_entities`), "
+            "and split long texts (`split_text_into_chunks`)."
+        )
+        if transcribe_audio_tool:
+             agent_description += " Can also transcribe audio files to text (`transcribe_audio_file`)."
+        agent = ReActAgent(
+            name="text_analyzer_agent",
+            description=agent_description,
+            tools=tools,
+            llm=llm,
+            system_prompt=system_prompt,
+            can_handoff_to=["planner_agent", "research_agent", "reasoning_agent"], # Example handoffs
+        )
+        logger.info("TextAnalyzerAgent initialized successfully.")
+        return agent
+    except Exception as e:
+        logger.error(f"Error during TextAnalyzerAgent initialization: {e}", exc_info=True)
+        raise
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running text_analyzer_agent.py directly for testing...")
+    # Check required keys
+    required_keys = ["GEMINI_API_KEY"]
+    missing_keys = [key for key in required_keys if not os.getenv(key)]
+    if missing_keys:
+        print(f"Error: Required environment variable(s) not set: {', '.join(missing_keys)}. Cannot run test.")
+    else:
+        try:
+            # Test summarization
+            print("\nTesting summarization...")
+            long_text = """The Industrial Revolution, now also known as the First Industrial Revolution, was a period of global transition of the human economy towards more efficient and stable manufacturing processes that succeeded the Agricultural Revolution, starting from Great Britain, continental Europe and the United States, that occurred during the period from around 1760 to about 1820–1840. This transition included going from hand production methods to machines; new chemical manufacturing and iron production processes; the increasing use of water power and steam power; the development of machine tools; and the rise of the mechanized factory system. The Revolution also saw an unprecedented rise in the rate of population growth."""
+            summary = summarize_text(long_text, max_length=50)
+            print(f"Summary:\n{summary}")
+            # Test entity extraction
+            print("\nTesting entity extraction...")
+            entities = extract_entities(long_text, entity_types=["EVENT", "GPE", "DATE"])
+            print(f"Extracted Entities:\n{entities}")
+            # Test text splitting
+            print("\nTesting text splitting...")
+            chunks = split_text_into_chunks(long_text * 3, chunk_size=150, chunk_overlap=30) # Make text longer
+            print(f"Split into {len(chunks)} chunks. First chunk:\n{chunks[0]}")
+            # Test audio transcription (if available)
+            if WHISPER_AVAILABLE:
+                print("\nTesting audio transcription...")
+                # Create a dummy audio file for testing (requires ffmpeg)
+                dummy_file = "dummy_audio.mp3"
+                try:
+                    # Generate a 1-second silent MP3 using ffmpeg
+                    subprocess.run(["ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", "1", "-q:a", "9", "-y", dummy_file], check=True, capture_output=True)
+                    print(f"Created dummy audio file: {dummy_file}")
+                    transcript = transcribe_audio(dummy_file)
+                    print(f"Transcription Result: '{transcript}' (Expected: empty or silence markers)")
+                    os.remove(dummy_file) # Clean up dummy file
+                except Exception as ffmpeg_err:
+                    print(f"Could not create/test dummy audio file (ffmpeg required): {ffmpeg_err}")
+            else:
+                print("\nSkipping audio transcription test as openai-whisper is not available.")
+            # Initialize the agent (optional)
+            # test_agent = initialize_text_analyzer_agent()
+            # print("\nText Analyzer Agent initialized successfully for testing.")
+        except Exception as e:
+            print(f"Error during testing: {e}")

agents/verifier_agent.py ADDED Viewed

	@@ -0,0 +1,300 @@

+import os
+import logging
+import re
+from typing import List
+from dotenv import load_dotenv
+from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.google_genai import GoogleGenAI
+# Load environment variables
+load_dotenv()
+# Setup logging
+logger = logging.getLogger(__name__)
+class VerificationError(Exception):
+    """Custom exception for verification failures."""
+    pass
+class Verifier:
+    """
+    Cross-check extracted facts, identify contradictions using LLM,
+    and assign a confidence score to each fact.
+    """
+    def __init__(self):
+        """Initializes the Verifier, loading configuration from environment variables."""
+        logger.info("Initializing Verifier...")
+        self.threshold = float(os.getenv("VERIFIER_CONFIDENCE_THRESHOLD", 0.7))
+        self.verifier_llm_model = os.getenv("VERIFIER_LLM_MODEL", "models/gemini-2.0-flash") # For scoring
+        self.agent_llm_model = os.getenv("VERIFIER_AGENT_LLM_MODEL", "models/gemini-1.5-pro") # For agent logic & contradiction
+        self.gemini_api_key = os.getenv("GEMINI_API_KEY")
+        if not self.gemini_api_key:
+            logger.error("GEMINI_API_KEY not found in environment variables.")
+            raise ValueError("GEMINI_API_KEY must be set")
+        try:
+            self.verifier_llm = GoogleGenAI(
+                api_key=self.gemini_api_key,
+                model=self.verifier_llm_model,
+            )
+            self.agent_llm = GoogleGenAI(
+                api_key=self.gemini_api_key,
+                model=self.agent_llm_model,
+            )
+            logger.info(f"Verifier initialized with threshold {self.threshold}, verifier LLM {self.verifier_llm_model}, agent LLM {self.agent_llm_model}")
+        except Exception as e:
+            logger.error(f"Error initializing Verifier LLMs: {e}", exc_info=True)
+            raise
+    def verify_facts(self, facts: List[str]) -> List[str]:
+        """
+        Assign a confidence score via LLM to each fact and return formatted strings.
+        Args:
+            facts (List[str]): Facts to verify.
+        Returns:
+            List[str]: Each item is "fact: score" with score ∈ [threshold, 1.0].
+        Raises:
+            VerificationError: If LLM call fails.
+        """
+        logger.info(f"Verifying {len(facts)} facts...")
+        results: List[str] = []
+        for fact in facts:
+            prompt = (
+                "You are a fact verifier. "
+                "On a scale from 0.00 to 1.00, where any value below "
+                f"{self.threshold:.2f} indicates low confidence, rate the following statement’s trustworthiness. "
+                "Respond with **only** a decimal number rounded to two digits (e.g., 0.82) and no extra text.\n\n"
+                f"Statement: \"{fact}\""
+            )
+            try:
+                response = self.verifier_llm.complete(prompt)
+                score_text = response.text.strip()
+                # Try direct conversion first
+                try:
+                    score = float(score_text)
+                except ValueError:
+                    # Fallback: extract first float if model returns extra text
+                    match = re.search(r"0?\.\d+|1(?:\.0+)?", score_text)
+                    if match:
+                        score = float(match.group(0))
+                        logger.warning(f"Extracted score {score} from noisy LLM response: {score_text}")
+                    else:
+                        logger.error(f"Could not parse score from LLM response: {score_text}. Using threshold {self.threshold}.")
+                        score = self.threshold # Fallback to threshold if parsing fails completely
+                # Enforce threshold floor
+                if score < self.threshold:
+                    logger.info(f"Score {score:.2f} for fact {fact} below threshold {self.threshold}, raising to threshold.")
+                    score = self.threshold
+                results.append(f"{fact}: {score:.2f}")
+            except Exception as e:
+                logger.error(f"LLM call failed during fact verification for {fact}: {e}", exc_info=True)
+                # Option 1: Raise an error
+                # raise VerificationError(f"LLM call failed for fact: {fact}") from e
+                # Option 2: Append an error message (current approach)
+                results.append(f"{fact}: ERROR - Verification failed")
+                # Option 3: Assign lowest score
+                # results.append(f"{fact}: {self.threshold:.2f} (Verification Error)")
+        logger.info(f"Fact verification complete. {len(results)} results generated.")
+        return results
+    def find_contradictions_llm(self, facts: List[str]) -> List[str]:
+        """
+        Identify contradictions among a list of facts using an LLM.
+        Args:
+            facts (List[str]): List of fact strings.
+        Returns:
+            List[str]: Pairs of facts detected as contradictory, joined by " <> ".
+        Raises:
+            VerificationError: If LLM call fails.
+        """
+        logger.info(f"Finding contradictions in {len(facts)} facts using LLM...")
+        if len(facts) < 2:
+            logger.info("Not enough facts to find contradictions.")
+            return []
+        facts_numbered = "\n".join([f"{i+1}. {fact}" for i, fact in enumerate(facts)])
+        prompt = (
+            "You are a logical reasoning assistant. Analyze the following numbered list of statements. "
+            "Identify any pairs of statements that directly contradict each other. "
+            "List *only* the numbers of the contradicting pairs, one pair per line, formatted as 'X, Y'. "
+            "If no contradictions are found, respond with 'None'. Do not include any other text or explanation.\n\n"
+            f"Statements:\n{facts_numbered}"
+        )
+        try:
+            response = self.agent_llm.complete(prompt) # Use the more powerful agent LLM
+            response_text = response.text.strip()
+            logger.info(f"LLM response for contradictions: {response_text}")
+            if response_text.lower() == 'none':
+                logger.info("LLM reported no contradictions.")
+                return []
+            contradiction_pairs = []
+            lines = response_text.split("\n")
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    # Expect format like "1, 5"
+                    parts = line.split(',')
+                    if len(parts) == 2:
+                        idx1 = int(parts[0].strip()) - 1
+                        idx2 = int(parts[1].strip()) - 1
+                        # Validate indices
+                        if 0 <= idx1 < len(facts) and 0 <= idx2 < len(facts) and idx1 != idx2:
+                            # Ensure pair order doesn't matter and avoid duplicates
+                            pair = tuple(sorted((idx1, idx2)))
+                            fact1 = facts[pair[0]]
+                            fact2 = facts[pair[1]]
+                            contradiction_str = f"{fact1} <> {fact2}"
+                            if contradiction_str not in contradiction_pairs:
+                                contradiction_pairs.append(contradiction_str)
+                                logger.info(f"Identified contradiction: {contradiction_str}")
+                        else:
+                             logger.warning(f"Invalid index pair found in LLM contradiction response: {line}")
+                    else:
+                        logger.warning(f"Could not parse contradiction pair from LLM response line: {line}")
+                except ValueError:
+                    logger.warning(f"Non-integer index found in LLM contradiction response line: {line}")
+                except Exception as parse_err:
+                     logger.warning(f"Error parsing LLM contradiction response line {line}: {parse_err}")
+            logger.info(f"Contradiction check complete. Found {len(contradiction_pairs)} pairs.")
+            return contradiction_pairs
+        except Exception as e:
+            logger.error(f"LLM call failed during contradiction detection: {e}", exc_info=True)
+            # Option 1: Raise an error
+            raise VerificationError("LLM call failed during contradiction detection") from e
+            # Option 2: Return empty list (fail silently)
+            # return []
+# --- Tool Definitions ---
+# Tools need to be created within the initialization function to bind to the instance
+# --- Agent Initialization ---
+# Store the initializer instance globally to ensure singleton behavior
+_verifier_initializer_instance = None
+class VerifierInitializer:
+    def __init__(self):
+        self.verifier = Verifier() # Initialize the Verifier class
+        self._create_tools()
+    def _create_tools(self):
+        self.verify_facts_tool = FunctionTool.from_defaults(
+            fn=self.verifier.verify_facts, # Bind to instance method
+            name="verify_facts",
+            description=(
+                "Assigns a numerical confidence score (based on plausibility and internal consistency) to each factual assertion in a list. "
+                "Input: List[str] of statements. Output: List[str] of 'statement: score' pairs."
+            ),
+        )
+        self.find_contradictions_tool = FunctionTool.from_defaults(
+            fn=self.verifier.find_contradictions_llm, # Bind to instance method (using LLM version)
+            name="find_contradictions",
+            description=(
+                "Uses an LLM to detect logical contradictions among a list of statements. "
+                "Input: List[str] of factual assertions. "
+                "Output: List[str] where each entry is a conflicting pair in the format 'statement1 <> statement2'. Returns empty list if none found."
+            )
+        )
+    def get_agent(self) -> FunctionAgent:
+        """Initializes and returns the Verifier Agent."""
+        logger.info("Creating VerifierAgent FunctionAgent instance...")
+        # System prompt (consider loading from file)
+        system_prompt = """\
+        You are VerifierAgent, a fact verification assistant. Given a list of factual statements, you must:
+        1. **Verify Facts**: Call `verify_facts` to assign a confidence score to each statement.
+        2. **Detect Contradictions**: Call `find_contradictions` to identify logical conflicts between the statements using an LLM.
+        3. **Present Results**: Output clear bullet points listing each fact with its confidence score, followed by a list of any detected contradictions.
+        4. **Hand-Off**: If significant contradictions or low-confidence facts are found that require deeper analysis, hand off to **reasoning_agent**. Otherwise, pass the verified facts and contradiction summary to **planner_agent** for integration.
+        """
+        agent = FunctionAgent(
+            name="verifier_agent",
+            description=(
+                "Evaluates factual statements by assigning confidence scores (`verify_facts`) "
+                "and detecting logical contradictions using an LLM (`find_contradictions`). "
+                "Hands off to reasoning_agent for complex issues or planner_agent for synthesis."
+            ),
+            tools=[
+                self.verify_facts_tool,
+                self.find_contradictions_tool,
+            ],
+            llm=self.verifier.agent_llm, # Use the agent LLM from the Verifier instance
+            system_prompt=system_prompt,
+            can_handoff_to=["reasoning_agent", "planner_agent"],
+        )
+        logger.info("VerifierAgent FunctionAgent instance created.")
+        return agent
+def get_verifier_initializer():
+    """Gets the singleton instance of VerifierInitializer."""
+    global _verifier_initializer_instance
+    if _verifier_initializer_instance is None:
+        logger.info("Instantiating VerifierInitializer for the first time.")
+        _verifier_initializer_instance = VerifierInitializer()
+    return _verifier_initializer_instance
+def initialize_verifier_agent() -> FunctionAgent:
+    """Initializes and returns the Verifier Agent using a singleton initializer."""
+    logger.info("initialize_verifier_agent called.")
+    initializer = get_verifier_initializer()
+    return initializer.get_agent()
+# Example usage (for testing if run directly)
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger.info("Running verifier_agent.py directly for testing...")
+    # Ensure API key is set for testing
+    if not os.getenv("GEMINI_API_KEY"):
+        print("Error: GEMINI_API_KEY environment variable not set. Cannot run test.")
+    else:
+        try:
+            test_agent = initialize_verifier_agent()
+            print("Verifier Agent initialized successfully for testing.")
+            # Test contradiction detection
+            initializer = get_verifier_initializer()
+            test_facts = [
+                "The sky is blue.",
+                "Water boils at 100 degrees Celsius.",
+                "The sky is not blue.",
+                "Paris is the capital of France."
+            ]
+            print(f"\nTesting contradiction detection on: {test_facts}")
+            contradictions = initializer.verifier.find_contradictions_llm(test_facts)
+            print(f"Detected contradictions: {contradictions}")
+            # Test fact verification
+            print(f"\nTesting fact verification on: {test_facts}")
+            verified = initializer.verifier.verify_facts(test_facts)
+            print(f"Verified facts: {verified}")
+        except Exception as e:
+            print(f"Error during testing: {e}")

app.py ADDED Viewed

	@@ -0,0 +1,421 @@

+import os
+import logging
+import mimetypes
+from dotenv import load_dotenv
+from typing import Any, List
+import gradio as gr
+import requests
+import pandas as pd
+from llama_index.core.agent.workflow import AgentWorkflow, ToolCallResult, ToolCall, AgentOutput
+from llama_index.core.base.llms.types import ChatMessage, TextBlock, ImageBlock, AudioBlock
+# Assuming agent initializers are in the same directory or a known path
+# Adjust import paths if necessary based on deployment structure
+try:
+    # Existing agents
+    from agents.image_analyzer_agent import initialize_image_analyzer_agent
+    from agents.reasoning_agent import initialize_reasoning_agent
+    from agents.text_analyzer_agent import initialize_text_analyzer_agent
+    from agents.code_agent import initialize_code_agent
+    from agents.math_agent import initialize_math_agent
+    from agents.planner_agent import initialize_planner_agent
+    from agents.research_agent import initialize_research_agent
+    from agents.role_agent import initialize_role_agent
+    from agents.verifier_agent import initialize_verifier_agent
+    # New agents
+    from agents.advanced_validation_agent import initialize_advanced_validation_agent
+    from agents.figure_interpretation_agent import initialize_figure_interpretation_agent
+    from agents.long_context_management_agent import initialize_long_context_management_agent
+    AGENT_IMPORT_PATH = "local"
+except ImportError as e:
+    # Fallback for potential different structures (e.g., nested folder)
+    try:
+        from final_project.image_analyzer_agent import initialize_image_analyzer_agent
+        from final_project.reasoning_agent import initialize_reasoning_agent
+        from final_project.text_analyzer_agent import initialize_text_analyzer_agent
+        from final_project.code_agent import initialize_code_agent
+        from final_project.math_agent import initialize_math_agent
+        from final_project.planner_agent import initialize_planner_agent
+        from final_project.research_agent import initialize_research_agent
+        from final_project.role_agent import initialize_role_agent
+        from final_project.verifier_agent import initialize_verifier_agent
+        from final_project.advanced_validation_agent import initialize_advanced_validation_agent
+        from final_project.figure_interpretation_agent import initialize_figure_interpretation_agent
+        from final_project.long_context_management_agent import initialize_long_context_management_agent
+        AGENT_IMPORT_PATH = "final_project"
+    except ImportError as e2:
+        print(f"Import Error: Could not find agent modules. Tried local and final_project paths. Error: {e2}")
+        # Set initializers to None or raise error to prevent app start
+        initialize_image_analyzer_agent = None
+        # ... set all others to None ...
+        raise RuntimeError(f"Failed to import agent modules: {e2}")
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+load_dotenv() # Load environment variables from .env file
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# --- Constants ---
+DEFAULT_API_URL = os.getenv("GAIA_API_URL", "https://agents-course-unit4-scoring.hf.space")
+# --- Agent Initialization (Singleton Pattern) ---
+# Initialize the agent workflow once
+AGENT_WORKFLOW = None
+try:
+    logger.info(f"Initializing GAIA Multi-Agent Workflow (import path: {AGENT_IMPORT_PATH})...")
+    # Existing agents
+    role_agent = initialize_role_agent()
+    code_agent = initialize_code_agent()
+    math_agent = initialize_math_agent()
+    planner_agent = initialize_planner_agent()
+    research_agent = initialize_research_agent()
+    text_analyzer_agent = initialize_text_analyzer_agent()
+    verifier_agent = initialize_verifier_agent()
+    image_analyzer_agent = initialize_image_analyzer_agent()
+    reasoning_agent = initialize_reasoning_agent()
+    # New agents
+    advanced_validation_agent = initialize_advanced_validation_agent()
+    figure_interpretation_agent = initialize_figure_interpretation_agent()
+    long_context_management_agent = initialize_long_context_management_agent()
+    # Check if all agents initialized successfully
+    all_agents = [
+        code_agent, role_agent, math_agent, planner_agent, research_agent,
+        text_analyzer_agent, image_analyzer_agent, verifier_agent, reasoning_agent,
+        advanced_validation_agent, figure_interpretation_agent, long_context_management_agent
+    ]
+    if not all(all_agents):
+        raise RuntimeError("One or more agents failed to initialize.")
+    AGENT_WORKFLOW = AgentWorkflow(
+        agents=all_agents,
+        root_agent="planner_agent" # Keep planner as root as per plan
+    )
+    logger.info("GAIA Multi-Agent Workflow initialized successfully.")
+except Exception as e:
+    logger.error(f"FATAL: Error initializing agent workflow: {e}", exc_info=True)
+    # AGENT_WORKFLOW remains None, BasicAgent init will fail
+# --- Basic Agent Definition (Wrapper for Workflow) ---
+class BasicAgent:
+    def __init__(self, workflow: AgentWorkflow):
+        if workflow is None:
+            logger.error("AgentWorkflow is None, initialization likely failed.")
+            raise RuntimeError("AgentWorkflow failed to initialize. Check logs for details.")
+        self.agent_workflow = workflow
+        logger.info("BasicAgent wrapper initialized.")
+    async def __call__(self, question: str | ChatMessage) -> Any:
+        if isinstance(question, ChatMessage):
+            log_question = str(question.blocks[0].text)[:100] if question.blocks and hasattr(question.blocks[0], "text") else str(question)[:100]
+            logger.info(f"Agent received question (first 100 chars): {log_question}...")
+        else:
+            logger.info(f"Agent received question (first 100 chars): {question[:100]}...")
+        handler = self.agent_workflow.run(user_msg=question)
+        current_agent = None
+        async for event in handler.stream_events():
+            if (
+                    hasattr(event, "current_agent_name")
+                    and event.current_agent_name != current_agent
+            ):
+                current_agent = event.current_agent_name
+                logger.info(f"{'=' * 50}\n")
+                logger.info(f"{'=' * 50}\n")
+            # Optional detailed logging (uncomment if needed)
+            # from llama_index.core.agent.runner.base import AgentStream, AgentInput
+            # if isinstance(event, AgentStream):
+            #     if event.delta:
+            #         logger.debug(f"STREAM: {event.delta}") # Use debug level
+            # elif isinstance(event, AgentInput):
+            #     logger.debug(f"📥 Input: {event.input}") # Use debug level
+            elif isinstance(event, AgentOutput):
+                if event.response and hasattr(event.response, 'content') and event.response.content:
+                    logger.info(f"📤 Output: {event.response.content}")
+                if event.tool_calls:
+                    logger.info(
+                        f"🛠️  Planning to use tools: {[call.tool_name for call in event.tool_calls]}"
+                    )
+            elif isinstance(event, ToolCallResult):
+                logger.info(f"🔧 Tool Result ({event.tool_name}):")
+                logger.info(f"  Arguments: {event.tool_kwargs}")
+                # Limit output logging length if potentially very long
+                output_str = str(event.tool_output)
+                logger.info(f"  Output: {output_str[:500]}{'...' if len(output_str) > 500 else ''}")
+            elif isinstance(event, ToolCall):
+                logger.info(f"🔨 Calling Tool: {event.tool_name}")
+                logger.info(f"  With arguments: {event.tool_kwargs}")
+        answer = await handler
+        final_content = answer.response.content if hasattr(answer, 'response') and hasattr(answer.response, 'content') else str(answer)
+        logger.info(f"Agent returning final answer: {final_content[:500]}{'...' if len(final_content) > 500 else ''}")
+        return answer.response # Return the actual response object expected by Gradio
+# --- Helper Functions for run_and_submit_all ---
+async def fetch_questions(questions_url: str) -> List[dict] | None:
+    """Fetches questions from the GAIA benchmark API."""
+    logger.info(f"Fetching questions from: {questions_url}")
+    try:
+        response = requests.get(questions_url, timeout=30) # Increased timeout
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+             logger.warning("Fetched questions list is empty.")
+             return None
+        logger.info(f"Fetched {len(questions_data)} questions.")
+        return questions_data
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Error fetching questions: {e}", exc_info=True)
+        return None
+    except requests.exceptions.JSONDecodeError as e:
+         logger.error(f"Error decoding JSON response from questions endpoint: {e}", exc_info=True)
+         logger.error(f"Response text: {response.text[:500]}")
+         return None
+    except Exception as e:
+        logger.error(f"An unexpected error occurred fetching questions: {e}", exc_info=True)
+        return None
+async def process_question(agent: BasicAgent, item: dict, base_fetch_file_url: str) -> dict | None:
+    """Processes a single question item using the agent."""
+    task_id = item.get("task_id")
+    question_text = item.get("question")
+    file_name = item.get("file_name")
+    if not task_id or question_text is None:
+        logger.warning(f"Skipping item with missing task_id or question: {item}")
+        return None
+    message: ChatMessage
+    if file_name:
+        fetch_file_url = f"{base_fetch_file_url}/{task_id}"
+        logger.info(f"Fetching file '{file_name}' for task {task_id} from {fetch_file_url}")
+        try:
+            response = requests.get(fetch_file_url, timeout=60) # Increased timeout for files
+            response.raise_for_status()
+            mime_type, _ = mimetypes.guess_type(file_name)
+            logger.info(f"File '{file_name}' MIME type guessed as: {mime_type}")
+            file_block: TextBlock | ImageBlock | AudioBlock | None = None
+            if mime_type:
+                # Prioritize specific extensions for text-like content
+                text_extensions = (
+                    ".txt", ".csv", ".json", ".xml", ".yaml", ".yml", ".ini", ".cfg", ".toml", ".log", ".properties",
+                    ".html", ".htm", ".xhtml", ".css", ".scss", ".sass", ".less", ".svg", ".md", ".rst",
+                    ".py", ".js", ".java", ".c", ".cpp", ".h", ".hpp", ".cs", ".go", ".php", ".rb", ".swift", ".kt",
+                    ".sh", ".bat", ".ipynb", ".Rmd", ".tex"  # Added more code/markup types
+                )
+                if mime_type.startswith('text/') or file_name.lower().endswith(text_extensions):
+                    try:
+                        file_content = response.content.decode('utf-8') # Try UTF-8 first
+                    except UnicodeDecodeError:
+                        try:
+                            file_content = response.content.decode('latin-1') # Fallback
+                            logger.warning(f"Decoded file {file_name} using latin-1 fallback.")
+                        except Exception as decode_err:
+                            logger.error(f"Could not decode file {file_name}: {decode_err}")
+                            file_content = f"[Error: Could not decode file content for {file_name}]"
+                    file_block = TextBlock(block_type="text", text=file_content)
+                elif mime_type.startswith('image/'):
+                    # Pass image content directly for multi-modal models
+                    file_block = ImageBlock(url=fetch_file_url, image=response.content)
+                elif mime_type.startswith('audio/'):
+                    # Pass audio content directly
+                    file_block = AudioBlock(url=fetch_file_url, audio=response.content)
+                elif mime_type == 'application/pdf':
+                    # PDF: Pass a text block indicating the URL for agents to handle
+                    logger.info(f"PDF file detected: {file_name}. Passing reference URL.")
+                    file_block = TextBlock(text=f"[Reference PDF file available at: {fetch_file_url}]")
+                # Add handling for other types like video if needed
+                # elif mime_type.startswith('video/'):
+                #     logger.info(f"Video file detected: {file_name}. Passing reference URL.")
+                #     file_block = TextBlock(text=f"[Reference Video file available at: {fetch_file_url}]")
+            if file_block:
+                 blocks = [TextBlock(text=question_text), file_block]
+                 message = ChatMessage(role="user", blocks=blocks)
+            else:
+                 logger.warning(f"File type for '{file_name}' (MIME: {mime_type}) not directly supported for block creation or no block created (e.g., unsupported). Passing text question only.")
+                 message = ChatMessage(role="user", blocks=[TextBlock(text=question_text)])
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Error fetching file for task {task_id}: {e}", exc_info=True)
+            return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: Failed to fetch file {file_name} - {e}"}
+        except Exception as e:
+             logger.error(f"Error processing file for task {task_id}: {e}", exc_info=True)
+             return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: Failed to process file {file_name} - {e}"}
+    else:
+        # No file associated with the question
+        message = ChatMessage(role="user", blocks=[TextBlock(text=question_text)])
+    # Run the agent on the prepared message
+    try:
+        logger.info(f"Running agent on task {task_id}...")
+        submitted_answer_response = await agent(message)
+        # Extract content safely
+        submitted_answer = submitted_answer_response.content if hasattr(submitted_answer_response, 'content') else str(submitted_answer_response)
+        logger.info(f"👍 Agent submitted answer for task {task_id}: {submitted_answer[:200]}{'...' if len(submitted_answer) > 200 else ''}")
+        return {"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer}
+    except Exception as e:
+         logger.error(f"Error running agent on task {task_id}: {e}", exc_info=True)
+         return {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"}
+async def submit_answers(submit_url: str, username: str, agent_code: str, results: List[dict]) -> tuple[str, pd.DataFrame]:
+    """Submits the collected answers to the GAIA benchmark API."""
+    answers_payload = [
+        {"task_id": r["Task ID"], "submitted_answer": r["Submitted Answer"]}
+        for r in results if "Submitted Answer" in r and not str(r["Submitted Answer"]).startswith("AGENT ERROR:")
+    ]
+    if not answers_payload:
+        logger.warning("Agent did not produce any valid answers to submit.")
+        results_df = pd.DataFrame(results)
+        return "Agent did not produce any valid answers to submit.", results_df
+    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    logger.info(status_update)
+    logger.info(f"Submitting to: {submit_url}")
+    try:
+        response = requests.post(submit_url, json=submission_data, timeout=120) # Increased timeout
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.get('username')}\n"
+            f"Overall Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"Message: {result_data.get('message', 'No message received.')}"
+        )
+        logger.info("Submission successful.")
+        results_df = pd.DataFrame(results)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        logger.error(status_message)
+        results_df = pd.DataFrame(results)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        logger.error(status_message)
+        results_df = pd.DataFrame(results)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        logger.error(status_message)
+        results_df = pd.DataFrame(results)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"Submission Failed: An unexpected error occurred during submission - {e}"
+        logger.error(status_message, exc_info=True)
+        results_df = pd.DataFrame(results)
+        return status_message, results_df
+# --- Main Function for Batch Processing ---
+async def run_and_submit_all(
+        username: str,
+        agent_code: str,
+        api_url: str = DEFAULT_API_URL,
+        level: int = 1,
+        max_questions: int = 0, # 0 means all questions for the level
+        progress=gr.Progress(track_tqdm=True)
+    ) -> tuple[str, pd.DataFrame]:
+    """Fetches all questions for a level, runs the agent, and submits answers."""
+    if not AGENT_WORKFLOW:
+        error_msg = "Agent Workflow is not initialized. Cannot run benchmark."
+        logger.error(error_msg)
+        return error_msg, pd.DataFrame()
+    if not username or not username.strip():
+        error_msg = "Username cannot be empty."
+        logger.error(error_msg)
+        return error_msg, pd.DataFrame()
+    questions_url = f"{api_url}/questions?level={level}"
+    submit_url = f"{api_url}/submit"
+    base_fetch_file_url = f"{api_url}/get_file"
+    questions = await fetch_questions(questions_url)
+    if questions is None:
+        error_msg = f"Failed to fetch questions for level {level}. Check logs."
+        return error_msg, pd.DataFrame()
+    # Limit number of questions if max_questions is set
+    if max_questions > 0:
+        questions = questions[:max_questions]
+        logger.info(f"Processing a maximum of {max_questions} questions for level {level}.")
+    else:
+        logger.info(f"Processing all {len(questions)} questions for level {level}.")
+    agent = BasicAgent(AGENT_WORKFLOW)
+    results = []
+    total_questions = len(questions)
+    for i, item in enumerate(progress.tqdm(questions, desc=f"Processing Level {level} Questions")):
+        result = await process_question(agent, item, base_fetch_file_url)
+        if result:
+            results.append(result)
+        # Optional: Add a small delay between questions if needed
+        # await asyncio.sleep(0.1)
+    # Submit answers
+    final_status, results_df = await submit_answers(submit_url, username, agent_code, results)
+    return final_status, results_df
+# --- Gradio Interface ---
+def create_gradio_interface():
+    """Creates and returns the Gradio interface."""
+    logger.info("Creating Gradio interface...")
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# GAIA Benchmark Agent Runner")
+        gr.Markdown("Run the initialized multi-agent system against the GAIA benchmark questions and submit the results.")
+        with gr.Row():
+            username = gr.Textbox(label="Username", placeholder="Enter your username (e.g., [email protected])")
+            agent_code = gr.Textbox(label="Agent Code", placeholder="Enter a short code for your agent (e.g., v1.0)")
+        with gr.Row():
+            level = gr.Dropdown(label="Benchmark Level", choices=[1, 2, 3], value=1)
+            max_questions = gr.Number(label="Max Questions (0 for all)", value=0, minimum=0, step=1)
+            api_url = gr.Textbox(label="GAIA API URL", value=DEFAULT_API_URL)
+        run_button = gr.Button("Run Benchmark and Submit", variant="primary")
+        with gr.Accordion("Results", open=False):
+            status_output = gr.Textbox(label="Submission Status", lines=5)
+            results_dataframe = gr.DataFrame(label="Detailed Results")
+        run_button.click(
+            fn=run_and_submit_all,
+            inputs=[username, agent_code, api_url, level, max_questions],
+            outputs=[status_output, results_dataframe]
+        )
+    logger.info("Gradio interface created.")
+    return demo
+# --- Main Execution ---
+if __name__ == "__main__":
+    if not AGENT_WORKFLOW:
+        print("ERROR: Agent Workflow failed to initialize. Cannot start Gradio app.")
+        print("Please check logs for initialization errors (e.g., missing API keys, import issues).")
+    else:
+        gradio_app = create_gradio_interface()
+        # Launch Gradio app
+        # Share=True creates a public link (use with caution)
+        # Set server_name="0.0.0.0" to allow access from network
+        gradio_app.launch(server_name="0.0.0.0", server_port=7860)

current_architecture.md ADDED Viewed

	@@ -0,0 +1,91 @@

+# Current GAIA Multi-Agent Framework Architecture
+This document summarizes the architecture of the GAIA multi-agent framework based on the provided Python source files.
+## Core Framework
+*   **Technology:** The system is built using the `llama_index.core.agent.workflow.AgentWorkflow` from the LlamaIndex library.
+*   **Orchestration:** `app.py` serves as the main entry point. It initializes a Gradio web interface, fetches benchmark questions from a specified API endpoint, manages file handling (text, image, audio) associated with questions, runs the agent workflow for each question, and submits the answers back to the API.
+*   **Root Agent:** The workflow designates `planner_agent` as the `root_agent`, meaning it receives the initial user request (question) and orchestrates the subsequent steps.
+## Agent Roster and Capabilities
+The framework comprises several specialized agents, each designed for specific tasks:
+1.  **`planner_agent` (Root):**
+    *   **Purpose:** Strategic planning, task decomposition, and final synthesis.
+    *   **Tools:** `generate_substeps` (breaks down objectives using an LLM), `synthesize_and_respond` (aggregates results into a final report using an LLM).
+    *   **Workflow:** Receives the initial objective, breaks it into sub-steps, delegates these steps to appropriate specialist agents, and finally synthesizes the collected results into a coherent answer.
+    *   **Handoffs:** Can delegate to `code_agent`, `research_agent`, `math_agent`, `role_agent`, `image_analyzer_agent`, `text_analyzer_agent`, `verifier_agent`, `reasoning_agent`.
+2.  **`role_agent`:**
+    *   **Purpose:** Determines and sets the appropriate persona or context for the task.
+    *   **Tools:** `role_prompt_retriever` (uses a combination of vector search and BM25 retrieval on the `fka/awesome-chatgpt-prompts` dataset, followed by reranking, to find the best role/prompt).
+    *   **Workflow:** Interprets user intent, retrieves relevant role descriptions, selects the best fit, and provides the role/prompt.
+    *   **Handoffs:** Hands off to `planner_agent` after setting the role.
+3.  **`code_agent`:**
+    *   **Purpose:** Generates and executes Python code.
+    *   **Tools:** `python_code_generator` (uses an OpenAI model `o4-mini` to generate code from a prompt), `code_interpreter` (uses LlamaIndex's tool spec, likely for sandboxed execution), and a custom `SimpleCodeExecutor` (executes Python code via `subprocess`, **not safe for production**).
+    *   **Workflow:** Takes a description, generates code, executes/tests it, and returns the result or final code.
+    *   **Handoffs:** Hands off to `planner_agent` or `reasoning_agent`.
+4.  **`math_agent`:**
+    *   **Purpose:** Performs mathematical computations.
+    *   **Tools:** A large suite of functions covering symbolic math (SymPy), matrix operations (NumPy), statistics (NumPy), numerical methods (NumPy, SciPy), vector math (NumPy), probability (SciPy), and potentially more (file was truncated). Also includes WolframAlpha integration.
+    *   **Workflow:** Executes specific mathematical operations based on requests.
+    *   **Handoffs:** (Inferred) Likely hands off to `planner_agent` or `reasoning_agent`.
+5.  **`research_agent`:**
+    *   **Purpose:** Gathers information from the web and specialized sources.
+    *   **Tools:** Web search (Google, DuckDuckGo, Tavily), web browsing/interaction (Helium/Selenium: `visit`, `get_text_by_css`, `get_page_html`, `click_element`, `search_item_ctrl_f`, `go_back`, `close_popups`), Wikipedia search/loading, Yahoo Finance data retrieval, ArXiv paper search.
+    *   **Workflow:** Executes a plan-act-observe loop to find and extract information from various online sources.
+    *   **Handoffs:** Can delegate to `code_agent`, `math_agent`, `analyzer_agent` (likely meant `text_analyzer_agent` or `image_analyzer_agent`), `planner_agent`, `reasoning_agent`.
+6.  **`text_analyzer_agent`:**
+    *   **Purpose:** Extracts text from PDFs and analyzes text content.
+    *   **Tools:** `extract_text_from_pdf` (uses PyPDF2, handles URLs and local files), `analyze_text` (uses an LLM to generate summary and key facts).
+    *   **Workflow:** If input is PDF, extracts text; then analyzes the text to produce a summary and list of facts.
+    *   **Handoffs:** Hands off to `verifier_agent`.
+7.  **`image_analyzer_agent`:**
+    *   **Purpose:** Analyzes image content factually.
+    *   **Tools:** Relies directly on the multimodal capabilities of its underlying LLM (Gemini 1.5 Pro) to process image inputs provided via `ChatMessage` blocks. No specific image analysis *tool* is defined, but the system prompt dictates a detailed, structured analysis format.
+    *   **Workflow:** Receives an image, performs analysis according to a strict factual template.
+    *   **Handoffs:** Hands off to `planner_agent`, `research_agent`, or `reasoning_agent`.
+8.  **`verifier_agent`:**
+    *   **Purpose:** Assesses the confidence of factual statements and detects contradictions.
+    *   **Tools:** `verify_facts` (uses an LLM - Gemini 2.0 Flash - to assign confidence scores), `find_contradictions` (uses simple string matching for negation pairs).
+    *   **Workflow:** Takes a list of facts, scores them, checks for contradictions, and reports results.
+    *   **Handoffs:** Hands off to `reasoning_agent` or `planner_agent`.
+9.  **`reasoning_agent`:**
+    *   **Purpose:** Performs explicit chain-of-thought reasoning.
+    *   **Tools:** `reasoning_tool` (uses an OpenAI model `o4-mini` with a detailed prompt to perform CoT reasoning over the provided context).
+    *   **Workflow:** Takes context, applies reasoning via the tool, and provides the structured reasoning output.
+    *   **Handoffs:** Hands off to `planner_agent`.
+## Workflow and Data Flow
+1.  A question (potentially with associated files) arrives at `app.py`.
+2.  `app.py` formats the input (e.g., `ChatMessage` with `TextBlock`, `ImageBlock`, `AudioBlock`) and passes it to the `AgentWorkflow` starting with `planner_agent`.
+3.  `planner_agent` breaks down the task.
+4.  It may call `role_agent` to set context.
+5.  It delegates sub-tasks to specialized agents (`research`, `code`, `math`, `text_analyzer`, `image_analyzer`).
+6.  Agents execute their tasks, potentially calling tools or other agents (e.g., `text_analyzer` calls `verifier_agent`).
+7.  `reasoning_agent` might be called for complex logical steps or verification.
+8.  Results flow back up, eventually reaching `planner_agent`.
+9.  `planner_agent` synthesizes the final answer using `synthesize_and_respond`.
+10. `app.py` receives the final answer and submits it.
+## Technology Stack Summary
+*   **Core:** Python, LlamaIndex
+*   **LLMs:** Google Gemini (1.5 Pro, 2.0 Flash), OpenAI (o4-mini)
+*   **UI:** Gradio
+*   **Web Interaction:** Selenium, Helium
+*   **Data Handling:** Pandas, PyPDF2, Requests
+*   **Search/Retrieval:** HuggingFace Embeddings/Rerankers, Datasets, LlamaIndex Tool Specs (Google, Tavily, Wikipedia, DuckDuckGo, Yahoo Finance, ArXiv)
+*   **Math:** SymPy, NumPy, SciPy, WolframAlpha
+*   **Code Execution:** Subprocess (basic executor), LlamaIndex Code Interpreter

gaia_improvement_plan.md ADDED Viewed

	@@ -0,0 +1,943 @@

+### 3.5. `research_agent.py` Refactoring
+*   **Rationale:** To improve browser instance management, error handling, and configuration.
+*   **Proposals:**
+    1.  **Browser Lifecycle Management:** Instead of initializing the browser (`start_chrome`) at the module level, manage its lifecycle explicitly. Options:
+        *   Initialize the browser within the agent's initialization and provide a method or tool to explicitly close it (`kill_browser`) when the agent's task is done or the application shuts down.
+        *   Use a context manager (`with start_chrome(...) as browser:`) if the browser is only needed for a specific scope within a tool call (less likely for a persistent agent).
+        *   Ensure `kill_browser` is reliably called. Perhaps the `planner_agent` could invoke a cleanup tool/method on the `research_agent` after its tasks are complete.
+    2.  **Configuration:** Move hardcoded Chrome options to configuration. Externalize API keys/IDs if not already done (they seem to be using `os.getenv`, which is good).
+    3.  **Robust Error Handling:** For browser interaction tools (`visit`, `get_text_by_css`, `click_element`), raise specific custom exceptions instead of returning error strings. This allows for more structured error handling by the agent or workflow.
+    4.  **Tool Consolidation (Optional):** The agent has many tools. Consider if some related tools (e.g., different search APIs) could be consolidated behind a single tool that internally chooses the best source, or if the LLM handles the large toolset effectively.
+*   **Diff Patch (Illustrative - Configuration & Browser Init):**
+    ```diff
+    --- a/research_agent.py
+    +++ b/research_agent.py
+    @@ -1,5 +1,6 @@
+     import os
+     import time
++    import logging
+     from typing import List
+     from llama_index.core.agent.workflow import ReActAgent
+@@ -15,17 +16,21 @@
+     from helium import start_chrome, go_to, find_all, Text, kill_browser
+     from helium import get_driver
++    logger = logging.getLogger(__name__)
++
+ # 1. Helium
+-chrome_options = webdriver.ChromeOptions()
+-chrome_options.add_argument("--no-sandbox")
+-chrome_options.add_argument("--disable-dev-shm-usage")
+-chrome_options.add_experimental_option("prefs", {
+-    "download.prompt_for_download": False,
+-    "plugins.always_open_pdf_externally": True,
+-    "profile.default_content_settings.popups": 0
+-})
+-
+-browser = start_chrome(headless=True, options=chrome_options)
++# Browser instance should be managed, not global at module level
++# browser = start_chrome(headless=True, options=chrome_options)
++
++def get_chrome_options():
++    options = webdriver.ChromeOptions()
++    if os.getenv("RESEARCH_AGENT_CHROME_NO_SANDBOX", "true").lower() == "true":
++        options.add_argument("--no-sandbox")
++    if os.getenv("RESEARCH_AGENT_CHROME_DISABLE_DEV_SHM", "true").lower() == "true":
++        options.add_argument("--disable-dev-shm-usage")
++    # Add other options from config as needed
++    # options.add_experimental_option(...) # Example
++    return options
+ def visit(url: str, wait_seconds: float = 2.0) -> str |None:
+     """
+@@ -36,10 +41,11 @@
+         wait_seconds (float): Time to wait after navigation.
+     """
+     try:
++        # Assumes browser is available in context (e.g., class member)
+         go_to(url)
+         time.sleep(wait_seconds)
+         return f"Visited: {url}"
+     except Exception as e:
++       logger.error(f"Error visiting {url}: {e}", exc_info=True)
+        return f"Error visiting {url}: {e}"
+ def get_text_by_css(selector: str) -> List[str] | str:
+@@ -52,13 +58,15 @@
+         List[str]: List of text contents.
+     """
+     try:
++        # Assumes browser/helium context is active
+         if selector.lower() == 'body':
+             elements = find_all(Text())
+         else:
+             elements = find_all(selector)
+         texts = [elem.web_element.text for elem in elements]
+-        print(f"Extracted {len(texts)} elements for selector \'{selector}\'")
++        logger.info(f"Extracted {len(texts)} elements for selector \'{selector}\'")
+         return texts
+     except Exception as e:
++        logger.error(f"Error extracting text for selector {selector}: {e}", exc_info=True)
+         return f"Error extracting text for selector {selector}: {e}"
+ def get_page_html() -> str:
+@@ -70,9 +78,11 @@
+         str: HTML content, or empty string on error.
+     """
+     try:
++        # Assumes browser/helium context is active
+         driver = get_driver()
+         html = driver.page_source
+         return html
+     except Exception as e:
++        logger.error(f"Error extracting HTML: {e}", exc_info=True)
+         return f"Error extracting HTML: {e}"
+ def click_element(selector: str, index_element: int = 0) -> str:
+@@ -83,10 +93,12 @@
+         selector (str): CSS selector of the element to click.
+     """
+     try:
++        # Assumes browser/helium context is active
+         element = find_all(selector)[index_element]
+         element.click()
+         time.sleep(1)
+         return f"Clicked element matching selector \'{selector}\'"
+     except Exception as e:
++        logger.error(f"Error clicking element {selector}: {e}", exc_info=True)
+         return f"Error clicking element {selector}: {e}"
+ def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
+@@ -97,6 +109,7 @@
+         nth_result: Which occurrence to jump to (default: 1)
+     """
+     elements = browser.find_elements(By.XPATH, f"//*[contains(text(), \'{text}\')]")
++    # Assumes browser is available in context
+     if nth_result > len(elements):
+         return f"Match n°{nth_result} not found (only {len(elements)} matches found)"
+     result = f"Found {len(elements)} matches for \'{text}\'."
+@@ -107,19 +120,22 @@
+def go_back() -> None:
+     """Goes back to previous page."""
+     browser.back()
++    # Assumes browser is available in context
+ def close_popups() -> None:
+     """
+     Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
+     """
+     webdriver.ActionChains(browser).send_keys(Keys.ESCAPE).perform()
++    # Assumes browser is available in context
+ def close() -> None:
+     """
+     Close the browser instance.
+     """
+     try:
++        # Assumes kill_browser is appropriate here
+         kill_browser()
+-        print("Browser closed")
++        logger.info("Browser closed via kill_browser()")
+     except Exception as e:
+-        print(f"Error closing browser: {e}")
++        logger.error(f"Error closing browser: {e}", exc_info=True)
+ visit_tool = FunctionTool.from_defaults(
+     fn=visit,
+@@ -240,9 +256,14 @@
+def initialize_research_agent() -> ReActAgent:
++    # Browser initialization should happen here or be managed externally
++    # Example: browser = start_chrome(headless=True, options=get_chrome_options())
++    # Ensure browser instance is passed to tools or accessible via agent state/class
++
++    llm_model_name = os.getenv("RESEARCH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=llm_model_name,
+     )
+     system_prompt = """\
+    ```
+### 3.6. `text_analyzer_agent.py` Refactoring
+*   **Rationale:** To improve configuration management and error handling.
+*   **Proposals:**
+    1.  **Configuration:** Move the hardcoded LLM model name (`models/gemini-1.5-pro`) to environment variables or a configuration file.
+    2.  **Prompt Management:** Move the `analyze_text` prompt to a separate template file.
+    3.  **Error Handling:** In `extract_text_from_pdf`, consider raising specific exceptions (e.g., `PDFDownloadError`, `PDFParsingError`) instead of returning error strings, allowing the agent to handle failures more gracefully.
+*   **Diff Patch (Illustrative - Configuration & Error Handling):**
+    ```diff
+    --- a/text_analyzer_agent.py
+    +++ b/text_analyzer_agent.py
+    @@ -6,6 +6,14 @@
+     logger = logging.getLogger(__name__)
++    class PDFExtractionError(Exception):
++        """Custom exception for PDF extraction failures."""
++        pass
++
++    class PDFDownloadError(PDFExtractionError):
++        """Custom exception for PDF download failures."""
++        pass
++
+ def extract_text_from_pdf(source: str) -> str:
+     """
+     Extract raw text from a PDF file on disk or at a URL.
+@@ -19,21 +27,21 @@
+         try:
+             resp = requests.get(source, timeout=10)
+             resp.raise_for_status()
+-        except Exception as e:
+-            return f"Error downloading PDF from {source}: {e}"
++        except requests.exceptions.RequestException as e:
++            raise PDFDownloadError(f"Error downloading PDF from {source}: {e}") from e
+         try:
+             tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
+             tmp.write(resp.content)
+             tmp.flush()
+             tmp_path = tmp.name
+             tmp.close()
+-        except Exception as e:
+-            return f"Error writing temp PDF file: {e}"
++        except IOError as e:
++            raise PDFExtractionError(f"Error writing temp PDF file: {e}") from e
+         path = tmp_path
+     else:
+         path = source
+     # Now extract text from the PDF on disk
+     if not os.path.isfile(path):
+-        return f"PDF not found: {path}"
++        raise PDFExtractionError(f"PDF not found: {path}")
+     text = ""
+@@ -41,10 +49,10 @@
+         reader = PdfReader(path)
+         pages = [page.extract_text() or "" for page in reader.pages]
+         text = "\n".join(pages)
+-        print(f"Extracted {len(pages)} pages of text from PDF")
++        logger.info(f"Extracted {len(pages)} pages of text from PDF: {path}")
+     except Exception as e:
+         # Catch specific PyPDF2 errors if possible, otherwise general Exception
+-        return f"Error reading PDF: {e}"
++        raise PDFExtractionError(f"Error reading PDF {path}: {e}") from e
+     # Clean up temporary file if one was created
+     if source.lower().startswith(("http://", "https://")):
+@@ -67,6 +75,14 @@
+         str: A plain-text string containing:
+              • A “Summary:” section with bullet points.
+              • A “Facts:” section with bullet points.
++    """
++    # Load prompt from file ideally
++    prompt_template = """You are an expert analyst.
++
++    Please analyze the following text and produce a plain-text response
++    with two sections:
++
++    Summary:
++    • Provide 2–3 concise bullet points summarizing the main ideas.
++
++    Facts:
++    • List each verifiable fact found in the text as a bullet point.
++
++    Respond with exactly that format—no JSON, no extra commentary.
++
++    Text to analyze:
++    \"\"\"
++    {text}
++    \"\"\"
+     """
+     # Build the prompt to guide the LLM’s output format
+     input_prompt = f"""You are an expert analyst.
+@@ -84,13 +100,14 @@
+     {text}
+     \"\"\"
+     """
++    input_prompt = prompt_template.format(text=text)
+     # Use the LLM to generate the analysis
++    llm_model_name = os.getenv("TEXT_ANALYZER_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=llm_model_name,
+     )
+     generated = llm.complete(input_prompt)
+@@ -124,9 +141,10 @@
+         FunctionAgent: Configured analysis agent.
+     """
++    llm_model_name = os.getenv("TEXT_ANALYZER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=llm_model_name,
+     )
+     system_prompt = """\
+    ```
+### 3.7. `reasoning_agent.py` Refactoring
+*   **Rationale:** To simplify the agent structure, improve configuration, and potentially optimize LLM usage.
+*   **Proposals:**
+    1.  **Configuration:** Move hardcoded LLM model names (`models/gemini-1.5-pro`, `o4-mini`) and the API key environment variable name (`ALPAFLOW_OPENAI_API_KEY`) to configuration.
+    2.  **Prompt Management:** Move the detailed CoT prompt from `reasoning_tool_fn` to a separate template file.
+    3.  **Agent Structure Simplification:** Given the rigid workflow (call tool -> handoff), consider replacing the `ReActAgent` with a simpler `FunctionAgent` that directly calls the `reasoning_tool` and formats the output before handing off. Alternatively, evaluate if the `reasoning_tool` logic could be integrated as a direct LLM call within agents that need CoT (like `planner_agent`), potentially removing the need for a separate `reasoning_agent` altogether, unless its specific CoT prompt/model (`o4-mini`) is crucial.
+*   **Diff Patch (Illustrative - Configuration & Prompt Loading):**
+    ```diff
+    --- a/reasoning_agent.py
+    +++ b/reasoning_agent.py
+    @@ -1,10 +1,19 @@
+     import os
++    import logging
+     from llama_index.core.agent.workflow import ReActAgent
+     from llama_index.llms.google_genai import GoogleGenAI
+     from llama_index.core.tools import FunctionTool
+     from llama_index.llms.openai import OpenAI
++    logger = logging.getLogger(__name__)
++
++    def load_prompt_from_file(filename="reasoning_tool_prompt.txt") -> str:
++        try:
++            with open(filename, "r") as f:
++                return f.read()
++        except FileNotFoundError:
++            logger.error(f"Prompt file {filename} not found.")
++            return "Perform chain-of-thought reasoning on the context: {context}"
++
+ def reasoning_tool_fn(context: str) -> str:
+     """
+     Perform end-to-end chain-of-thought reasoning over the full multi-agent workflow context,
+@@ -17,45 +26,12 @@
+         str: A structured reasoning trace with numbered thought steps, intermediate checks,
+              and a concise final recommendation or conclusion.
+     """
+-    prompt = f"""You are an expert reasoning engine.  You have the following full context of a multi-agent workflow:
+-
+-    {context}
+-
+-    Your job is to:
+-    1. **Comprehension**
+-       - Read the entire question or problem statement carefully.
+-       - Identify key terms, constraints, and desired outcomes.
+-
+-    2. **Decomposition**
+-       - Break down the problem into logical sub-steps or sub-questions.
+-       - Ensure each sub-step is necessary and sufficient to progress toward a solution.
+-
+-    3. **Chain-of-Thought**
+-       - Articulate your internal reasoning in clear, numbered steps.
+-       - At each step, state your assumptions, derive implications, and check for consistency.
+-
+-    4. **Intermediate Verification**
+-       - After each reasoning step, validate your conclusion against the problem’s constraints.
+-       - If a contradiction or uncertainty arises, revisit and refine the previous step.
+-
+-    5. **Synthesis**
+-       - Once all sub-steps are resolved, integrate the intermediate results into a cohesive answer.
+-       - Ensure the final answer directly addresses the user’s request and all specified criteria.
+-
+-    6. **Clarity & Precision**
+-       - Use formal, precise language.
+-       - Avoid ambiguity: define any technical terms you introduce.
+-       - Provide just enough detail to justify each conclusion without digression.
+-
+-    7. **Final Answer**
+-       - Present a concise, well-structured response.
+-       - If appropriate, include a brief summary of your reasoning steps.
+-
+-    Respond with your reasoning steps followed by the final recommendation.
+-    """
++    prompt_template = load_prompt_from_file()
++    prompt = prompt_template.format(context=context)
++    reasoning_llm_model = os.getenv("REASONING_TOOL_LLM_MODEL", "o4-mini")
++    # Use specific API key if needed, e.g., ALPAFLOW_OPENAI_API_KEY
++    reasoning_api_key_env = os.getenv("REASONING_TOOL_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
++    reasoning_api_key = os.getenv(reasoning_api_key_env)
+     llm = OpenAI(
+-        model="o4-mini",
+-        api_key=os.getenv("ALPAFLOW_OPENAI_API_KEY"),
++        model=reasoning_llm_model,
++        api_key=reasoning_api_key,
+         reasoning_effort="high"
+     )
+     response = llm.complete(prompt)
+@@ -74,9 +50,10 @@
+     """
+     Create a pure reasoning agent with no tools, relying solely on chain-of-thought.
+     """
++    agent_llm_model = os.getenv("REASONING_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=agent_llm_model,
+     )
+     system_prompt = """\
+    ```
+### 3.8. `planner_agent.py` Refactoring
+*   **Rationale:** To improve configuration management and prompt handling.
+*   **Proposals:**
+    1.  **Configuration:** Move the hardcoded LLM model name (`models/gemini-1.5-pro`) to environment variables or a configuration file.
+    2.  **Prompt Management:** Move the system prompt and the prompts within the `plan` and `synthesize_and_respond` functions to separate template files for better readability and maintainability.
+*   **Diff Patch (Illustrative - Configuration & Prompt Loading):**
+    ```diff
+    --- a/planner_agent.py
+    +++ b/planner_agent.py
+    @@ -1,10 +1,19 @@
+     import os
++    import logging
+     from typing import List, Any
+     from llama_index.core.agent.workflow import FunctionAgent, ReActAgent
+     from llama_index.core.tools import FunctionTool
+     from llama_index.llms.google_genai import GoogleGenAI
++    logger = logging.getLogger(__name__)
++
++    def load_prompt_from_file(filename: str, default_prompt: str) -> str:
++        try:
++            with open(filename, "r") as f:
++                return f.read()
++        except FileNotFoundError:
++            logger.warning(f"Prompt file {filename} not found. Using default.")
++            return default_prompt
++
+ def plan(objective: str) -> List[str]:
+     """
+     Generate a list of sub-questions from the given objective.
+@@ -15,14 +24,16 @@
+     Returns:
+         List[str]: A list of sub-steps as strings.
+     """
+-    input_prompt: str = (
++    default_plan_prompt = (
+         "You are a research assistant. "
+         "Given an objective, break it down into a list of concise, actionable sub-steps.\n"
+         f"Objective: {objective}\n"
+         "Sub-steps (one per line):"
+     )
++    plan_prompt_template = load_prompt_from_file("planner_plan_prompt.txt", default_plan_prompt)
++    input_prompt = plan_prompt_template.format(objective=objective)
++    llm_model_name = os.getenv("PLANNER_TOOL_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=llm_model_name,
+     )
+@@ -44,13 +55,16 @@
+     Returns:
+         str: A unified, well-structured response addressing the original objective.
+     """
+-    # Join each ready-made QA block directly
+     summary_blocks = "\n".join(results)
+-    input_prompt = f"""You are an expert synthesizer. Given the following sub-questions and their answers,
++    default_synth_prompt = f"""You are an expert synthesizer. Given the following sub-questions and their answers,
+     produce a single, coherent, comprehensive report that addresses the original objective:
+     {summary_blocks}
+     Final Report:
+     """
++    synth_prompt_template = load_prompt_from_file("planner_synthesize_prompt.txt", default_synth_prompt)
++    input_prompt = synth_prompt_template.format(summary_blocks=summary_blocks)
++
++    llm_model_name = os.getenv("PLANNER_TOOL_LLM_MODEL", "models/gemini-1.5-pro") # Can use same model as plan
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=llm_model_name,
+     )
+     response = llm.complete(input_prompt)
+     return response.text
+@@ -77,9 +91,10 @@
+     """
+     Initialize a LlamaIndex agent specialized in research planning and question engineering.
+     """
++    agent_llm_model = os.getenv("PLANNER_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=agent_llm_model,
+     )
+     system_prompt = """\
+@@ -108,6 +123,7 @@
+     **Completion & Synthesis**
+     If the final result fully completes the original objective, produce a consolidated synthesis of the roadmap and send it as your concluding output.
+     """
++    system_prompt = load_prompt_from_file("planner_system_prompt.txt", system_prompt) # Load from file if exists
+     agent = ReActAgent(
+         name="planner_agent",
+    ```
+### 3.9. `code_agent.py` Refactoring
+*   **Rationale:** To address the critical security vulnerability of the `SimpleCodeExecutor`, improve configuration management, and align code execution with safer practices.
+*   **Proposals:**
+    1.  **Remove `SimpleCodeExecutor`:** This class and its `execute` method using `subprocess` with raw code strings are fundamentally insecure and **must be removed entirely**.
+    2.  **Use `CodeInterpreterToolSpec`:** Rely *exclusively* on the `code_interpreter` tool derived from LlamaIndex's `CodeInterpreterToolSpec` for code execution. This tool is designed for safer, sandboxed execution.
+    3.  **Update `CodeActAgent` Initialization:** Remove the `code_execute_fn` parameter when initializing `CodeActAgent`, as the agent should use the provided `code_interpreter` tool for execution via the standard ReAct/Act loop, not a direct execution function.
+    4.  **Configuration:** Move hardcoded LLM model names (`o4-mini`, `models/gemini-1.5-pro`) and the API key environment variable name (`ALPAFLOW_OPENAI_API_KEY`) to configuration.
+    5.  **Prompt Management:** Move the `generate_python_code` prompt to a separate template file.
+*   **Diff Patch (Illustrative - Security Fix & Configuration):**
+    ```diff
+    --- a/code_agent.py
+    +++ b/code_agent.py
+    @@ -1,5 +1,6 @@
+     import os
+     import subprocess
++    import logging
+     from llama_index.core.agent.workflow import ReActAgent, CodeActAgent
+     from llama_index.core.tools import FunctionTool
+@@ -7,6 +8,16 @@
+     from llama_index.llms.openai import OpenAI
+     from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
++    logger = logging.getLogger(__name__)
++
++    def load_prompt_from_file(filename: str, default_prompt: str) -> str:
++        try:
++            with open(filename, "r") as f:
++                return f.read()
++        except FileNotFoundError:
++            logger.warning(f"Prompt file {filename} not found. Using default.")
++            return default_prompt
++
+ def generate_python_code(prompt: str) -> str:
+     """
+     Generate valid Python code from a natural language description.
+@@ -27,7 +38,7 @@
+           it before execution.
+         - This function only generates code and does not execute it.
+     """
+-
+-    input_prompt = f"""You are also a helpful assistant that writes Python code.
++    default_gen_prompt = f"""You are also a helpful assistant that writes Python code.
+     You will be given a prompt and you must generate Python code based on that prompt.
+     You must only generate Python code and nothing else.
+     Do not include any explanations or any other text.
+@@ -40,10 +51,14 @@
+     Code:\n
+     """
++    gen_prompt_template = load_prompt_from_file("code_gen_prompt.txt", default_gen_prompt)
++    input_prompt = gen_prompt_template.format(prompt=prompt)
++
++    gen_llm_model = os.getenv("CODE_GEN_LLM_MODEL", "o4-mini")
++    gen_api_key_env = os.getenv("CODE_GEN_API_KEY_ENV", "ALPAFLOW_OPENAI_API_KEY")
++    gen_api_key = os.getenv(gen_api_key_env)
+     llm = OpenAI(
+-        model="o4-mini",
+-        api_key=os.getenv("ALPAFLOW_OPENAI_API_KEY")
++        model=gen_llm_model,
++        api_key=gen_api_key
+     )
+     generated_code = llm.complete(input_prompt)
+@@ -74,60 +89,11 @@
+     ),
+ )
+-from typing import Any, Dict, Tuple
+-import io
+-import contextlib
+-import ast
+-import traceback
+-
+-
+-class SimpleCodeExecutor:
+-    """
+-    A simple code executor that runs Python code with state persistence.
+-
+-    This executor maintains a global and local state between executions,
+-    allowing for variables to persist across multiple code runs.
+-
+-    NOTE: not safe for production use! Use with caution.
+-    """
+-
+-    def __init__(self):
+-        pass
+-
+-    def execute(self, code: str) -> str:
+-        """
+-        Execute Python code and capture output and return values.
+-
+-        Args:
+-            code: Python code to execute
+-
+-        Returns:
+-            Dict with keys `success`, `output`, and `return_value`
+-        """
+-        print(f"Executing code: {code}")
+-        try:
+-            result = subprocess.run(
+-                ["python", code],
+-                stdout=subprocess.PIPE,
+-                stderr=subprocess.PIPE,
+-                text=True,
+-                timeout=60
+-            )
+-            if result.returncode != 0:
+-                print(f"Execution failed with error: {result.stderr.strip()}")
+-                return f"Error: {result.stderr.strip()}"
+-            else:
+-                output = result.stdout.strip()
+-                print(f"Captured Output: {output}")
+-                return output
+-        except subprocess.TimeoutExpired:
+-            print("Execution timed out.")
+-            return "Error: Timeout"
+-        except Exception as e:
+-            print(f"Execution failed with error: {e}")
+-            return f"Error: {e}"
+-
+ def initialize_code_agent() -> CodeActAgent:
+-    code_executor = SimpleCodeExecutor()
++    # DO NOT USE SimpleCodeExecutor - it is insecure.
++    # Rely on the code_interpreter tool provided below.
++    agent_llm_model = os.getenv("CODE_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=agent_llm_model,
+     )
+     system_prompt = """\
+@@ -151,6 +117,7 @@
+        - If further logical reasoning or verification is needed, delegate to **reasoning_agent**.
+        - Otherwise, once you have the final code or execution result, pass your output to **planner_agent** for overall synthesis and presentation.
+     """
++    system_prompt = load_prompt_from_file("code_agent_system_prompt.txt", system_prompt)
+     agent = CodeActAgent(
+         name="code_agent",
+@@ -161,7 +128,7 @@
+             "pipelines, and library development, CodeAgent delivers production-ready Python solutions."
+         ),
+         # REMOVED: code_execute_fn=code_executor.execute, # Use code_interpreter tool instead
+-        code_execute_fn=code_executor.execute,
+         tools=[
+             python_code_generator_tool,
+             code_interpreter_tool,
+    ```
+### 3.10. `math_agent.py` Refactoring
+*   **Rationale:** To improve configuration management and potentially simplify the tool interface for the LLM.
+*   **Proposals:**
+    1.  **Configuration:** Move the hardcoded agent LLM model name (`models/gemini-1.5-pro`) to configuration. Ensure the WolframAlpha App ID is configured via environment variable (`WOLFRAM_ALPHA_APP_ID`) as intended.
+    2.  **Tool Granularity:** The current approach creates a separate tool for almost every single math function (solve, derivative, integral, add, multiply, inverse, mean, median, etc.). While explicit, this results in a very large number of tools for the `ReActAgent` to manage. Consider:
+        *   **Grouping:** Group related functions under fewer tools. For example, a `symbolic_math_tool` that takes the operation type (solve, diff, integrate) as a parameter, or a `matrix_ops_tool`.
+        *   **Natural Language Interface:** Create a single `calculate` tool that takes a natural language math query (e.g., "solve x**2 - 4 = 0 for x", "mean of [1, 2, 3]") and uses an LLM (or rule-based parsing) internally to dispatch to the appropriate NumPy/SciPy/SymPy function. This simplifies the interface for the main agent LLM but adds complexity within the tool.
+        *   **WolframAlpha Prioritization:** Evaluate if WolframAlpha can handle many of these requests directly, potentially reducing the need for numerous specific SymPy/NumPy tools, especially for symbolic tasks.
+    3.  **Truncated File:** Since the original file was truncated, ensure the full file is reviewed if possible, as there might be other issues or tools not seen.
+*   **Diff Patch (Illustrative - Configuration):**
+    ```diff
+    --- a/math_agent.py
+    +++ b/math_agent.py
+    @@ -1,5 +1,6 @@
+     import os
+     from typing import List, Optional, Union
++    import logging
+     import sympy as sp
+     import numpy as np
+     from llama_index.core.agent.workflow import ReActAgent
+    @@ -12,6 +13,8 @@
+     from scipy.integrate import odeint
+     import numpy.fft as fft
++    logger = logging.getLogger(__name__)
++
+     # --- Symbolic math functions ---
+    @@ -451,10 +454,11 @@
+ def initialize_math_agent() -> ReActAgent:
++    agent_llm_model = os.getenv("MATH_AGENT_LLM_MODEL", "models/gemini-1.5-pro")
+     llm = GoogleGenAI(
+         api_key=os.getenv("GEMINI_API_KEY"),
+-        model="models/gemini-1.5-pro",
++        model=agent_llm_model,
+     )
+     # Ensure WolframAlpha App ID is set
+    ```
+*(Refactoring proposals section complete)*
+## 4. New Feature Designs
+This section outlines the design for the new features requested: YouTube Ingestion and Generic Audio Transcription.
+### 4.1. YouTube Ingestion
+*   **Rationale:** To enable the framework to process YouTube videos by extracting audio, transcribing it, and summarizing the content, as requested by the user.
+*   **Design Proposal:**
+    *   **Implementation:** Introduce a new dedicated agent, `youtube_agent`, or add tools to the existing `research_agent` or `text_analyzer_agent`. A dedicated agent seems cleaner given the specific multi-step workflow.
+    *   **Agent (`youtube_agent`):**
+        *   **Purpose:** Manages the end-to-end process of downloading YouTube audio, chunking, transcribing, and summarizing.
+        *   **Tools:**
+            1.  `download_youtube_audio`: Takes a YouTube URL, uses a library like `yt-dlp` (or potentially `pytube`) to download the audio stream into a temporary file (e.g., `.mp3` or `.opus`). Returns the path to the audio file.
+            2.  `chunk_audio_file`: Takes an audio file path and a maximum chunk duration (e.g., 60 seconds). Uses a library like `pydub` or `librosa`+`soundfile` to split the audio into smaller, sequentially numbered temporary files. Returns a list of chunk file paths.
+            3.  `transcribe_audio_chunk_gemini`: Takes an audio file path (representing a chunk). Uses the Google Generative AI SDK (`google.generativeai`) to call the Gemini 1.5 Pro model with the audio file for transcription. Returns the transcribed text.
+            4.  `summarize_transcript`: Takes the full concatenated transcript text. Uses a Gemini model (e.g., 1.5 Pro or Flash) with a specific prompt to generate a one-paragraph summary. Returns the summary text.
+        *   **Workflow (ReAct or Function sequence):**
+            1.  Receive YouTube URL.
+            2.  Call `download_youtube_audio`.
+            3.  Call `chunk_audio_file` with the downloaded audio path.
+            4.  Iterate through the list of chunk paths:
+                *   Call `transcribe_audio_chunk_gemini` for each chunk.
+                *   Collect transcribed text segments.
+            5.  Concatenate all transcribed text segments into a full transcript.
+            6.  Call `summarize_transcript` with the full transcript.
+            7.  Return the full transcript and the summary.
+            8.  Clean up temporary audio files (downloaded and chunks).
+        *   **Handoff:** Could hand off the transcript and summary to `planner_agent` or `text_analyzer_agent` for further processing or integration.
+    *   **Dependencies:** `yt-dlp`, `pydub` (requires `ffmpeg` or `libav`), `google-generativeai`.
+    *   **Configuration:** Gemini API Key, chunk duration.
+### 4.2. Generic Audio Transcription
+*   **Rationale:** To provide a flexible audio transcription capability for local files or remote URLs, using Gemini Pro for quality/latency tolerance and Whisper.cpp as a fallback, exposing it via a Python API as requested.
+*   **Design Proposal:**
+    *   **Implementation:** Introduce a new dedicated agent, `transcription_agent`, or add tools to `text_analyzer_agent`. A dedicated agent allows for clearer separation of concerns, especially managing the Whisper.cpp dependency and logic.
+    *   **Agent (`transcription_agent`):**
+        *   **Purpose:** Transcribes audio from various sources (local path, URL) using either Gemini or Whisper.cpp based on latency requirements or availability.
+        *   **Tools:**
+            1.  `prepare_audio_source`: Takes a source string (URL or local path). If it's a URL, downloads it to a temporary file using `requests`. Validates the local file path. Returns the path to the local audio file.
+            2.  `transcribe_gemini`: Takes an audio file path. Uses the `google-generativeai` SDK to call Gemini 1.5 Pro for transcription. Returns the transcribed text. This is the preferred method when latency is acceptable.
+            3.  `transcribe_whisper_cpp`: Takes an audio file path. Uses a Python wrapper around `whisper.cpp` (e.g., installing `whisper.cpp` via `apt` or compiling from source, then using `subprocess` or a dedicated Python binding if available) to perform local transcription. Returns the transcribed text. This is the fallback or low-latency option.
+            4.  `choose_transcription_method`: (Internal logic or a simple tool) Takes latency preference (e.g., 'high_quality' vs 'low_latency') or checks Gemini availability/quota. Decides whether to use `transcribe_gemini` or `transcribe_whisper_cpp`.
+        *   **Workflow (ReAct or Function sequence):**
+            1.  Receive audio source (URL/path) and potentially a latency preference.
+            2.  Call `prepare_audio_source` to get a local file path.
+            3.  Call `choose_transcription_method` (or execute internal logic) to decide between Gemini and Whisper.
+            4.  If Gemini: Call `transcribe_gemini`.
+            5.  If Whisper: Call `transcribe_whisper_cpp`.
+            6.  Return the resulting transcript.
+            7.  Clean up temporary downloaded audio file if applicable.
+        *   **Handoff:** Could hand off the transcript to `planner_agent` or `text_analyzer_agent`.
+    *   **Python API:**
+        *   Define a simple Python function (e.g., in a `transcription_api.py` module) that encapsulates the agent's logic or directly calls the underlying transcription functions.
+        ```python
+        # Example API function in transcription_api.py
+        from .transcription_agent import transcribe_audio # Assuming agent logic is refactored
+        def get_transcript(source: str, prefer_gemini: bool = True) -> str:
+            """Transcribes audio from a local path or URL.
+            Args:
+                source: Path to the local audio file or URL.
+                prefer_gemini: If True, attempts to use Gemini Pro first.
+                               If False or Gemini fails, falls back to Whisper.cpp.
+            Returns:
+                The transcribed text.
+            Raises:
+                TranscriptionError: If transcription fails.
+            """
+            # Implementation would call the agent or its refactored functions
+            try:
+                # Simplified logic - actual implementation needs error handling,
+                # Gemini/Whisper selection based on preference/availability
+                transcript = transcribe_audio(source, prefer_gemini)
+                return transcript
+            except Exception as e:
+                # Log error
+                raise TranscriptionError(f"Failed to transcribe {source}: {e}") from e
+        class TranscriptionError(Exception):
+            pass
+        ```
+    *   **Dependencies:** `requests`, `google-generativeai`, `whisper.cpp` (requires separate installation/compilation), potentially Python bindings for `whisper.cpp`.
+    *   **Configuration:** Gemini API Key, path to `whisper.cpp` executable or library, Whisper model selection.
+## 5. Extra Agent Designs
+This section proposes three additional specialized agents designed to enhance performance on the GAIA benchmark by addressing common challenges like complex fact verification, interpreting visual data representations, and handling long contexts.
+### 5.1. Agent Design 1: Advanced Validation Agent (`validation_agent`)
+*   **Purpose:** To perform rigorous validation of factual claims or intermediate results generated by other agents, going beyond the simple contradiction check of the current `verifier_agent`. This agent aims to improve the accuracy and trustworthiness of the final answer by cross-referencing information and performing checks.
+*   **Key Tool Calls:**
+    *   `web_search` (from `research_agent` or similar): To find external evidence supporting or refuting a claim.
+    *   `browse_and_extract` (from `research_agent` or similar): To access specific URLs found during search and extract relevant text snippets.
+    *   `code_interpreter` (from `code_agent`): To perform calculations or simple data manipulations needed for verification (e.g., checking unit conversions, calculating percentages).
+    *   `knowledge_base_lookup` (New Tool - Optional): Interface with a structured knowledge base (e.g., Wikidata, internal DB) to verify entities, relationships, or properties.
+    *   `llm_check_consistency` (New Tool or LLM call): Use a powerful LLM with a specific prompt to assess the logical consistency between a claim and a set of provided evidence snippets or existing context.
+*   **Agent Loop Sketch (ReAct style):**
+    1.  **Input:** A specific claim or statement to validate, along with relevant context or source information.
+    2.  **Thought:** Identify the core assertion in the claim. Determine the best validation strategy (e.g., web search for current events, calculation for numerical claims, consistency check for logical statements).
+    3.  **Action:** Call the appropriate tool (`web_search`, `code_interpreter`, `llm_check_consistency`).
+    4.  **Observation:** Analyze the tool's output (search results, calculation result, consistency assessment).
+    5.  **Thought:** Does the observation confirm, refute, or remain inconclusive about the claim? Is more information needed? (e.g., need to browse a specific search result).
+    6.  **Action (if needed):** Call another tool (`browse_and_extract`, `llm_check_consistency` with new evidence).
+    7.  **Observation:** Analyze new output.
+    8.  **Thought:** Synthesize findings. Assign a final validation status (e.g., Confirmed, Refuted, Uncertain) and provide supporting evidence or reasoning.
+    9.  **Output:** Validation status and justification.
+    10. **Handoff:** Return result to `planner_agent` or `verifier_agent` (if this agent replaces the contradiction part).
+### 5.2. Agent Design 2: Figure Interpretation Agent (`figure_interpretation_agent`)
+*   **Purpose:** To specialize in extracting structured data and meaning from figures, charts, graphs, and tables embedded within images or documents, which are common in GAIA tasks and often require more than just a textual description.
+*   **Key Tool Calls:**
+    *   `image_ocr` (New Tool or enhanced `image_analyzer_agent` capability): High-precision OCR focused on extracting text specifically from figures, including axes labels, legends, titles, and data points.
+    *   `chart_data_extractor` (New Tool): Utilizes specialized vision models (e.g., DePlot, ChartOCR, or similar fine-tuned models) designed to parse chart types (bar, line, pie) and extract underlying data series or key values.
+    *   `table_parser` (New Tool): Uses vision or document AI models to detect table structures in images/PDFs and extract cell content into a structured format (e.g., list of lists, Pandas DataFrame via code execution).
+    *   `code_interpreter` (from `code_agent`): To process extracted data (e.g., load into DataFrame, perform simple analysis, re-plot for verification).
+    *   `llm_interpret_figure` (New Tool or LLM call): Takes extracted text, data, and potentially the image itself (multimodal) to provide a semantic interpretation of the figure's message or trends.
+*   **Agent Loop Sketch (Function sequence or ReAct):**
+    1.  **Input:** An image or document page containing a figure/table, potentially with context or a specific question about it.
+    2.  **Action:** Call `image_ocr` to get all text elements.
+    3.  **Action:** Call `chart_data_extractor` or `table_parser` based on visual analysis (or try both) to get structured data.
+    4.  **Action (Optional):** Call `code_interpreter` to load structured data into a DataFrame for easier handling.
+    5.  **Action:** Call `llm_interpret_figure`, providing the extracted text, data (raw or DataFrame), and potentially the original image, asking it to answer the specific question or summarize the figure's key insights.
+    6.  **Output:** Structured data (if requested) and/or the semantic interpretation/answer.
+    7.  **Handoff:** Return results to `planner_agent` or `reasoning_agent`.
+### 5.3. Agent Design 3: Long Context Management Agent (`long_context_agent`)
+*   **Purpose:** To effectively manage and query information from very long documents or conversation histories that exceed the context window limits of standard models or require efficient information retrieval techniques.
+*   **Key Tool Calls:**
+    *   `document_chunker` (New Tool): Splits long text into semantically meaningful chunks (e.g., using `SentenceSplitter` from LlamaIndex or more advanced methods).
+    *   `vector_store_builder` (New Tool): Takes text chunks and builds an in-memory or persistent vector index (using libraries like `llama-index`, `langchain`, `faiss`, `chromadb`).
+    *   `vector_retriever` (New Tool): Queries the built vector index with a specific question to find the most relevant chunks.
+    *   `summarizer_tool` (New Tool or LLM call): Generates summaries of long text or selected chunks, potentially using different levels of detail.
+    *   `contextual_synthesizer` (New Tool or LLM call): Takes retrieved relevant chunks and the original query, then uses an LLM to synthesize an answer grounded in the retrieved context (RAG pattern).
+*   **Agent Loop Sketch (Can be stateful):**
+    1.  **Input:** A long document (text or path) or a long conversation history, and a specific query or task related to it.
+    2.  **(Initialization/First Use):**
+        *   **Action:** Call `document_chunker`.
+        *   **Action:** Call `vector_store_builder` to create an index from the chunks. Store the index reference.
+    3.  **(Querying):**
+        *   **Action:** Call `vector_retriever` with the user's query to get relevant chunks.
+        *   **Action:** Call `contextual_synthesizer`, providing the query and retrieved chunks, to generate the final answer.
+    4.  **(Alternative: Summarization Task):**
+        *   **Action:** Call `summarizer_tool` on the full text (if feasible for the tool) or on retrieved chunks based on a high-level query.
+    5.  **Output:** The synthesized answer or the summary.
+    6.  **Handoff:** Return results to `planner_agent`.
+## 6. Migration Plan
+This section details the recommended steps for applying the proposed changes, lists new dependencies, and outlines minimal validation tests.
+### 6.1. Order of Implementation
+It is recommended to apply changes in the following order to minimize disruption and build upon stable foundations:
+1.  **Core Refactoring (`app.py`, Configuration, Logging):**
+    *   Implement centralized configuration (e.g., `.env` file) and update all agents to use it for API keys, model names, etc.
+    *   Integrate Python's `logging` module throughout `app.py` and all agent files, replacing `print` statements.
+    *   Refactor `app.py`: Implement singleton agent initialization and break down `run_and_submit_all`.
+    *   Apply structural refactors to agents (class-based structure, avoiding globals) like `role_agent`, `verifier_agent`, `research_agent`.
+2.  **Critical Security Fix (`code_agent`):**
+    *   Immediately remove the `SimpleCodeExecutor` and modify `code_agent` to rely solely on the `code_interpreter` tool.
+3.  **Core Functionality Refactoring (`verifier_agent`, `math_agent`):**
+    *   Improve `verifier_agent`'s contradiction detection (e.g., using an LLM or NLI model).
+    *   Refactor `math_agent` tools if choosing to group them or use a natural language interface.
+4.  **New Feature: Generic Audio Transcription (`transcription_agent`):**
+    *   Install `whisper.cpp` and its dependencies.
+    *   Implement the `transcription_agent` and its tools (`prepare_audio_source`, `transcribe_gemini`, `transcribe_whisper_cpp`).
+    *   Implement the Python API function `get_transcript`.
+5.  **New Feature: YouTube Ingestion (`youtube_agent`):**
+    *   Install `yt-dlp` and `pydub` (and `ffmpeg`).
+    *   Implement the `youtube_agent` and its tools (`download_youtube_audio`, `chunk_audio_file`, `transcribe_audio_chunk_gemini`, `summarize_transcript`).
+6.  **New Agent Implementation (Validation, Figure, Long Context):**
+    *   Implement `validation_agent` and its tools.
+    *   Implement `figure_interpretation_agent` and its tools (requires sourcing/installing chart/table parsing models/libraries).
+    *   Implement `long_context_agent` and its tools (requires vector DB setup like `faiss` or `chromadb`).
+7.  **Integration and Workflow Adjustments:**
+    *   Update `planner_agent`'s system prompt and handoff logic to incorporate the new agents.
+    *   Update other agents' handoff targets as needed.
+    *   Update `app.py` if the overall agent initialization or workflow invocation changes.
+### 6.2. New Dependencies (`requirements.txt`)
+Based on the refactoring and new features, the following dependencies might need to be added or updated in `requirements.txt` (or managed via environment setup):
+*   `python-dotenv`: For loading configuration from `.env` files.
+*   `google-generativeai`: For interacting with Gemini models (already likely present via `llama-index-llms-google-genai`).
+*   `yt-dlp`: For downloading YouTube videos.
+*   `pydub`: For audio manipulation (chunking). Requires `ffmpeg` or `libav` system dependency.
+*   `llama-index-vector-stores-faiss` / `faiss-cpu` / `faiss-gpu`: For `long_context_agent` vector store (choose one).
+*   `chromadb` / `llama-index-vector-stores-chroma`: Alternative vector store for `long_context_agent`.
+*   `llama-index-multi-modal-llms-google`: Ensure multimodal support for Gemini is correctly installed.
+*   *Possibly*: Libraries for NLI models (e.g., `transformers`, `torch`) if used in `validation_agent`.
+*   *Possibly*: Libraries for chart/table parsing (e.g., specific models from Hugging Face, `opencv-python`, `pdf2image`) if implementing `figure_interpretation_agent` tools.
+*   *Possibly*: Python bindings for `whisper.cpp` if not using `subprocess`.
+**System Dependencies:**
+*   `ffmpeg` or `libav`: Required by `pydub`.
+*   `whisper.cpp`: Needs to be compiled or installed separately. Follow its specific instructions.
+### 6.3. Validation Tests
+Minimal tests should be implemented to validate key changes:
+1.  **Configuration:** Test loading of API keys and model names from the configuration source.
+2.  **Logging:** Verify that logs are being generated at the correct levels and formats.
+3.  **`code_agent` Security:** Test that `code_agent` uses `code_interpreter` and *not* the removed `SimpleCodeExecutor`. Attempt a malicious code execution via prompt to ensure it fails safely within the interpreter's sandbox.
+4.  **`verifier_agent` Contradiction:** Test the improved contradiction detection with sample pairs of contradictory and non-contradictory statements.
+5.  **`transcription_agent`:**
+    *   Test with a short local audio file using both Gemini and Whisper.cpp, comparing output quality/speed.
+    *   Test with an audio URL.
+    *   Test the Python API function `get_transcript`.
+6.  **`youtube_agent`:**
+    *   Test with a short YouTube video URL.
+    *   Verify audio download, chunking, transcription of chunks, and final summary generation.
+    *   Check cleanup of temporary files.
+7.  **New Agents (Basic):**
+    *   For `validation_agent`, `figure_interpretation_agent`, `long_context_agent`, implement basic tests confirming agent initialization and successful calls to their primary new tools with mock inputs/outputs.
+8.  **End-to-End Smoke Test:** Run `app.py` and process one or two simple GAIA tasks that are likely to invoke the refactored components and potentially a new feature (if a relevant task exists) to ensure the overall workflow remains functional.
+*(Implementation plan complete. Ready for user confirmation.)*

prompts/advanced_validation_agent_prompt.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+You are AdvancedValidationAgent, a specialized agent focused on rigorously evaluating the accuracy, consistency, and potential biases of information provided by other agents or external sources.
+**Core Mission:** To act as a critical gatekeeper, ensuring the reliability and trustworthiness of data before it's used for final synthesis or decision-making.
+**Key Capabilities & Tools:**
+1.  **`cross_reference_check`**: Given a claim or piece of information and a list of potential source URLs or documents, verify the claim against these sources. Report supporting, contradicting, or inconclusive findings.
+2.  **`logical_consistency_check`**: Analyze a set of statements or a block of text for internal contradictions, logical fallacies, or inconsistencies in reasoning.
+3.  **`bias_detection`**: Examine text for potential biases (e.g., confirmation bias, framing bias, selection bias) based on language, tone, and source context. Identify the type of bias detected.
+4.  **`fact_check_with_search`**: Use external search tools (delegated via handoff to `research_agent` if necessary, or using internal search if available) to verify specific factual claims against reliable web sources.
+**Workflow:**
+1.  **Receive Input:** Accept a specific claim, statement, document, or set of findings to validate, along with any relevant context or source information.
+2.  **Select Tool:** Choose the most appropriate validation tool(s) based on the input type and validation goal (e.g., use `cross_reference_check` for source verification, `logical_consistency_check` for reasoning analysis, `bias_detection` for evaluating neutrality, `fact_check_with_search` for specific facts).
+3.  **Execute Tool(s):** Apply the selected tool(s) methodically.
+4.  **Synthesize Findings:** Consolidate the results from the validation checks into a structured report, clearly stating:
+    *   The original claim/information.
+    *   The validation methods used.
+    *   Detailed findings (e.g., supporting evidence, contradictions found, logical flaws identified, biases detected).
+    *   An overall confidence score or assessment (e.g., High Confidence, Medium Confidence with caveats, Low Confidence/Contradicted).
+5.  **Hand-Off:** Pass the validation report back to the requesting agent (usually `planner_agent` or `reasoning_agent`) for further action.
+**Constraints:**
+*   Focus solely on validation tasks.
+*   Do not generate new content beyond the validation report.
+*   Clearly state the limitations of the validation if sources are unavailable or ambiguous.
+*   Prioritize accuracy and objectivity in your assessment.
+*   Handoff to `research_agent` if external web searching is required for fact-checking beyond provided sources.

prompts/code_gen_prompt.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+You are also a helpful assistant that writes Python code.
+You will be given a prompt and you must generate Python code based on that prompt.
+You must only generate Python code and nothing else.
+Do not include any explanations or any other text.
+Do not use any markdown.
+Notes:
+    - The generated code may be complex; it is recommended to review and test
+      it before execution.
+    - This function only generates code and does not execute it.
+Prompt: {prompt}
+Code:

prompts/figure_interpretation_agent_prompt.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+You are FigureInterpretationAgent, a specialized agent designed to analyze and interpret visual data representations like charts, graphs, diagrams, and tables presented as images.
+**Core Mission:** To extract meaningful insights, data points, trends, and relationships from visual data formats.
+**Key Capabilities & Tools:**
+1.  **`describe_figure`**: Provide a general description of the figure, including its type (e.g., bar chart, line graph, flowchart, table), main elements (axes, labels, legend), and overall topic.
+2.  **`extract_data_points`**: Identify and extract specific data points or values from the figure. This might involve reading values from axes, bars, lines, or table cells. Specify the target data points if possible (e.g., "value for Q3 2024", "maximum value shown").
+3.  **`identify_trends`**: Analyze trends shown in the figure (e.g., increasing/decreasing trends in line graphs, comparisons in bar charts). Describe the observed patterns.
+4.  **`compare_elements`**: Compare different elements within the figure (e.g., compare the heights of two bars, the values of two lines at a specific point, data in different table rows/columns).
+5.  **`summarize_figure_insights`**: Provide a high-level summary of the key insights or the main message conveyed by the figure.
+**Workflow:**
+1.  **Receive Input:** Accept an image file containing the figure to be analyzed, along with a specific request (e.g., "describe this chart", "extract the sales figures for 2023", "what is the main trend shown?").
+2.  **Analyze Image:** Utilize multimodal capabilities to visually process the image.
+3.  **Select Tool/Task:** Based on the user request, determine the appropriate analysis task (description, data extraction, trend identification, comparison, summarization).
+4.  **Execute Analysis:** Perform the visual analysis to fulfill the request. This involves interpreting the visual elements and extracting the relevant information.
+5.  **Format Output:** Present the findings clearly and concisely, directly addressing the user's request.
+6.  **Hand-Off:** Pass the interpretation results back to the requesting agent (e.g., `planner_agent`, `research_agent`, `reasoning_agent`).
+**Constraints:**
+*   Focus solely on interpreting the provided visual data.
+*   Do not perform calculations beyond reading values directly from the figure unless explicitly asked and feasible.
+*   Acknowledge limitations if the figure is unclear, low-resolution, or lacks necessary labels/context.
+*   Base interpretations strictly on the visual information present in the image.
+*   Requires multimodal input capabilities to process the image file.

prompts/image_analyzer_prompt.txt ADDED Viewed

	@@ -0,0 +1,69 @@

+You are ImageAnalyzerAgent, an expert in cold, factual visual analysis. Your sole mission is to describe and analyze each image with the utmost exhaustiveness, precision, and absence of conjecture. Follow these directives exactly:
+1. **Context & Role**
+   - You are an automated, impartial analysis system with no emotional or subjective bias.
+   - Your objective is to deliver a **purely factual** analysis of the image, avoiding artistic interpretation, author intent, aesthetic judgment, or speculation about non-visible elements.
+2. **Analysis Structure**
+   Adhere strictly to this order in your output:
+   1. **General Identification**
+      - Output format: “Image received: [filename or path]”.
+      - Dimensions (if available): width × height in pixels.
+      - File format (JPEG, PNG, GIF, etc.).
+   2. **Scene Description**
+      - Total number of detected objects.
+      - Spatial distribution: primary areas of interest (top/left/center, etc.).
+   3. **Detailed Object List**
+      For **each** detected object, provide:
+      - **Class/type** (person, animal, vehicle, landscape, text, graphic, etc.).
+      - **Exact position**: bounding box coordinates (x_min, y_min, x_max, y_max).
+      - **Relative size**: percentage of image area or pixel dimensions.
+      - **Dominant color** (for uniform shapes) or top color palette.
+      - **Attributes**: posture, orientation, readable text, pattern, state (open/closed, on/off), geometric properties (shape, symmetry).
+   4. **Color Palette & Composition**
+      - **Simplified histogram**: list the 5 most frequent colors in hexadecimal (#RRGGBB) with approximate percentages.
+      - **Contrast & brightness**: factual description (e.g., “low overall contrast,” “very dark region in bottom right”).
+      - **Visual balance**: symmetric or asymmetric distribution of masses, guiding lines, focal points.
+   5. **Technical Metrics & Metadata**
+      - EXIF data (if available): capture date/time, camera model, aperture, shutter speed, ISO.
+      - Effective resolution (DPI/PPI), aspect ratio (4:3, 16:9, square).
+   6. **Textual Elements**
+      - OCR of **all** visible text: exact transcription, approximate font type (serif/sans-serif), relative size.
+      - Text layout (alignment, orientation, spacing).
+   7. **Geometric Analysis**
+      - Identify repeating patterns (textures, mosaics, geometric motifs).
+      - Measure dominant angles (vertical, horizontal, diagonal lines).
+   8. **Uncertainty Indicators**
+      - For each object or attribute, briefly state confidence level (high/medium/low) based on image clarity (blur, obstruction, low resolution).
+      - Example: “Detected ‘bicycle’ with medium confidence (partially blurred).”
+   9. **Factual Summary**
+      - Recap all listed elements without additional commentary.
+      - Numbered bullet list, each item prefixed by its category label (e.g., “1. Detected objects: …”, “2. Color palette: …”).
+3. **Absolute Constraints**
+   - No psychological, symbolic, or subjective interpretation.
+   - No value judgments or qualifiers.
+   - Never omit any visible object or attribute.
+   - Strictly follow the prescribed order and structure without alteration.
+4. **Output Format**
+   - Plain text only, numbered sections separated by two line breaks.
+5. **Agent Handoff**
+   Once the image analysis is fully complete, hand off to one of the following agents:
+   - **planner_agent** for roadmap creation or final synthesis.
+   - **research_agent** for any additional information gathering.
+   - **reasoning_agent** for pure chain-of-thought reasoning or deeper logical interpretation.
+By adhering to these instructions, ensure your visual analysis is cold, factual, comprehensive, and
+completely devoid of subjectivity before handing off.

prompts/long_context_management_agent_prompt.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+You are LongContextManagementAgent, a specialized agent responsible for handling and processing extensive textual context, such as long documents, lengthy conversation histories, or large datasets.
+**Core Mission:** To distill, organize, and query long-form text effectively, enabling other agents to work with manageable and relevant information.
+**Key Capabilities & Tools:**
+1.  **`summarize_long_context`**: Generate summaries of long text at different levels of detail (e.g., brief overview, multi-paragraph summary, chapter-level summaries).
+2.  **`extract_key_information`**: Identify and extract specific types of key information from the long context based on a query (e.g., extract all mentions of Project X, find all decisions made in the meeting transcript).
+3.  **`filter_by_relevance`**: Given a query or topic, filter the long context to retain only the most relevant sections or paragraphs.
+4.  **`build_context_index` (Conceptual/Internal):** (Potentially an internal mechanism rather than a direct tool) Create an index (e.g., using LlamaIndex) over the long context to enable efficient querying and retrieval, which might be used by other tools.
+5.  **`query_context_index`**: Answer specific questions based on the information contained within the long context, potentially leveraging an internal index for efficiency.
+**Workflow:**
+1.  **Receive Input:** Accept long text content (potentially as a file path or string) and a specific task (e.g., "summarize this document", "find all references to the budget discussion", "answer this question based on the transcript").
+2.  **Pre-process/Index (If applicable):** Load the text. If the task involves querying or repeated access, consider building an internal index for efficiency.
+3.  **Select Tool/Task:** Choose the appropriate tool based on the request (summarization, extraction, filtering, querying).
+4.  **Execute Task:** Apply the selected tool to the long context.
+5.  **Format Output:** Present the results (summary, extracted information, filtered text, query answer) clearly.
+6.  **Hand-Off:** Pass the processed information back to the requesting agent.
+**Constraints:**
+*   Focus on processing and managing the provided long context.
+*   Do not introduce external information unless explicitly part of a query that requires broader context (which might involve handoff).
+*   Handle potentially very large inputs efficiently (consider chunking, indexing).
+*   Clearly indicate if requested information cannot be found within the provided context.

prompts/planner_agent_prompt.txt ADDED Viewed

	@@ -0,0 +1,33 @@

+You are PlannerAgent, a dedicated research strategist and question‐engineer capable of handling text, audio, images, and video inputs.
+Your mission is to transform any high‐level objective into a clear, prioritized roadmap of 4–8 actionable sub‐steps that guide step‐by‐step research or task execution.
+**Role Assessment**
+First, consider whether a specific role context (e.g., developer, analyst, translator) should be declared at the start to better frame the planning process.
+**Format**
+Present the final list as a numbered list only, with each item no longer than one sentence and free of extra commentary.
+**Style**
+Use a formal, professional tone; remain neutral and precise; avoid filler words.
+**Hand-Off or Self-Answer**
+Once planning is complete, address each sub-question in turn and then hand off as appropriate:
+- For coding tasks, invoke **code_agent**.
+- For web or literature research, invoke **research_agent**.
+- For mathematical analysis, invoke **math_agent**.
+- For assigning roles or contexts, invoke **role_agent**.
+- For deep image analysis, invoke **image_analyzer_agent**.
+- For deep text analysis, invoke **text_analyzer_agent**.
+- For pure chain-of-thought reasoning or logical verification, invoke **reasoning_agent**.
+- If none apply, you may attempt to answer the sub-question yourself.
+**Agent Constraints**
+Only the following agents are available: **code_agent**, **research_agent**, **math_agent**, **role_agent**, **image_analyzer_agent**, **text_analyzer_agent**, **verifier_agent**, **reasoning_agent**.
+Do not invoke any other agents (e.g., **chess_agent**, **educate_agent**, **game_agent**, etc.).
+**Finalize**
+After all sub-questions have been addressed—by hand-off or self-answer—compile and present the ultimate, coherent solution yourself using the `synthesize_and_respond` tool.
+**Completion & Synthesis**
+If the final result fully completes the original objective, produce a consolidated synthesis of the roadmap and send it as your concluding output.

prompts/reasoning_agent_prompt.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+You are ReasoningAgent, an advanced cognitive engine specialized in rigorous, step-by-step reasoning.
+**Tool Usage**
+Always begin by invoking the `reasoning_tool` to perform your internal chain-of-thought reasoning.
+Provide the full context and user question as inputs to `reasoning_tool`.
+**Post-Reasoning Hand-Off**
+After the `reasoning_tool` returns its output—regardless of the content—you must immediately delegate
+to **planner_agent** for roadmap refinement and final synthesis.
+**Important**: You have no direct access to external data sources or the internet.
+All reasoning is performed by `reasoning_tool` and then handed off to **planner_agent**.

prompts/text_analyzer_prompt.txt ADDED Viewed

	@@ -0,0 +1,43 @@

+You are TextAnalyzerAgent, an expert text‐analysis assistant. On each request—whether raw text or a PDF URL/path—you must:
+1. **Determine Input Type**
+   - If the input is a URL or a local file path ending in “.pdf”, call `extract_text_from_pdf` with `{"source": <input>}`.
+   - Otherwise, treat the input directly as text.
+2. **Extract Text (if PDF)**
+   Thought: Explain that you are retrieving text from the PDF or accepting raw text.
+   Action: extract_text_from_pdf or (skip for raw text)
+   Action Input: {"source": <input>}
+   Await Observation: the full concatenated text or an error message.
+   - If an error occurs, immediately return that error as your Answer.
+3. **Analyze Content**
+   Thought: Outline that you will produce a summary and list of facts.
+   Action: analyze_text
+   Action Input: {"text": <extracted_or_raw_text>}
+   Await Observation: a plain‐text response with “Summary:” and “Facts:” sections.
+4. **Format Response**
+   Thought: I can answer without using any more tools.
+   Answer:
+     Summary:
+     • <bullet point 1>
+     • <bullet point 2>
+     • <bullet point 3>
+     Facts:
+     • <fact 1>
+     • <fact 2>
+     • …
+5. **Guidelines**
+   - Never include extra sections or commentary.
+   - Use exactly one tool per Action.
+   - If extraction fails, stop and return the error.
+   - Ensure bullets use “• ” and sections are labeled “Summary:” and “Facts:”.
+6. **Hand‐Off**
+   After delivering your “Summary:” and “Facts:”, pass the extracted facts list to `verifier_agent` for confidence scoring and contradiction detection.
+Follow this Thought→Action→Observation→… cycle rigorously to produce consistent, reliable analyses.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,31 @@

+[project]
+name = "gaia-agent"
+version = "0.1.0"
+description = "Add your description here"
+requires-python = ">=3.11"
+dependencies = [
+    "certifi>=2025.4.26",
+    "datasets>=3.5.1",
+    "dotenv>=0.9.9",
+    "gradio>=5.28.0",
+    "helium>=5.1.1",
+    "huggingface>=0.0.1",
+    "llama-index>=0.12.33",
+    "llama-index-embeddings-huggingface>=0.5.3",
+    "llama-index-llms-google-genai>=0.1.9",
+    "llama-index-retrievers-bm25>=0.5.2",
+    "llama-index-tools-arxiv>=0.3.0",
+    "llama-index-tools-code-interpreter>=0.3.0",
+    "llama-index-tools-duckduckgo>=0.3.0",
+    "llama-index-tools-google>=0.3.0",
+    "llama-index-tools-tavily-research>=0.3.0",
+    "llama-index-tools-wikipedia>=0.3.0",
+    "llama-index-tools-wolfram-alpha>=0.3.0",
+    "llama-index-tools-yahoo-finance>=0.3.0",
+    "openai-whisper>=20240930",
+    "pandas>=2.2.3",
+    "requests>=2.32.3",
+    "scipy>=1.15.2",
+    "sympy>=1.14.0",
+    "youtube-transcript-api>=1.0.3",
+]

todo.md ADDED Viewed

	@@ -0,0 +1,44 @@

+# GAIA Framework Improvement Plan - ToDo List
+1.  [X] Create overall output document structure (`gaia_improvement_plan.md`).
+2.  [ ] Generate ASCII diagram of the *revised* architecture (incorporating proposed changes).
+3.  [ ] Perform Code Quality Review:
+    *   [ ] Review `app.py`
+    *   [ ] Review `role_agent.py`
+    *   [ ] Review `image_analyzer_agent.py`
+    *   [ ] Review `verifier_agent.py`
+    *   [ ] Review `research_agent.py`
+    *   [ ] Review `text_analyzer_agent.py`
+    *   [ ] Review `reasoning_agent.py`
+    *   [ ] Review `planner_agent.py`
+    *   [ ] Review `code_agent.py`
+    *   [ ] Review `math_agent.py` (note truncation)
+    *   [ ] Consolidate findings for Code Quality section in the report.
+4.  [ ] Develop Refactor Proposals:
+    *   [ ] Propose refactors for `app.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `role_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `image_analyzer_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `verifier_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `research_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `text_analyzer_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `reasoning_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `planner_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `code_agent.py` (if any) + generate diff.
+    *   [ ] Propose refactors for `math_agent.py` (if any) + generate diff.
+    *   [ ] Consolidate proposals for Refactoring section in the report.
+5.  [ ] Design New Features:
+    *   [ ] Design YouTube Ingestion feature (module/agent, steps, tools, API).
+    *   [ ] Design Generic Audio Transcription feature (module/agent, steps, tools, API, Gemini/Whisper logic).
+    *   [ ] Document designs in New Features section of the report.
+6.  [ ] Design Extra Agents:
+    *   [ ] Design Agent 1 (Purpose, Tools, Loop Sketch).
+    *   [ ] Design Agent 2 (Purpose, Tools, Loop Sketch).
+    *   [ ] Design Agent 3 (Purpose, Tools, Loop Sketch).
+    *   [ ] Document designs in Extra Agents section of the report.
+7.  [ ] Create Migration Plan:
+    *   [ ] Define order of applying changes/features.
+    *   [ ] List new dependencies for `requirements.txt`.
+    *   [ ] Outline minimal unit/integration tests for validation.
+    *   [ ] Document plan in Migration Plan section of the report.
+8.  [ ] Assemble final report (`gaia_improvement_plan.md`).
+9.  [ ] Ask user for confirmation/feedback on the plan before proceeding (as per user's "First action" instruction).

user_requirements.md ADDED Viewed

	@@ -0,0 +1,63 @@

+# User Requirements for GAIA Framework Improvement
+Based on the provided `pasted_content.txt`, the user's requirements for improving the GAIA multi-agent framework are as follows:
+## Overall Goal
+Improve the existing multi-agent framework to maximize performance on the GAIA benchmark.
+## Specific Objectives
+1.  **Code Quality Review:**
+    *   Analyze all source files (`.py`, config, tests, docs if present).
+    *   Identify weaknesses such as:
+        *   Design smells
+        *   Performance issues
+        *   Missing type hints
+        *   Brittle parsing logic
+        *   Insufficient test coverage
+        *   Other reliability impairments.
+2.  **Refactor Proposals:**
+    *   For each identified weakness, suggest concise improvements.
+    *   Provide practical improvements as diff-style patches where feasible.
+3.  **New Feature Implementation:**
+    *   **YouTube Ingestion:**
+        *   Input: YouTube video URL.
+        *   Process: Download audio -> Chunk audio (≤ 60 seconds) -> Transcribe chunks using Gemini 1.5 Pro.
+        *   Output: Full transcript and a one-paragraph summary.
+    *   **Generic Audio Transcription:**
+        *   Input: Local audio file path or remote audio URL.
+        *   Process: Transcribe using Gemini 1.5 Pro (preferred for latency tolerance) or Whisper-cpp (fallback).
+        *   Output: Expose functionality via a simple Python API.
+4.  **Extra Agent Design:**
+    *   Design at least three new specialized agents.
+    *   These agents should demonstrably contribute to boosting GAIA benchmark performance.
+    *   For each new agent, provide:
+        *   Purpose
+        *   Key tool calls
+        *   Sketch of the agent loop/logic.
+5.  **Migration Plan:**
+    *   Define the recommended order for applying the proposed refactor patches and implementing new features.
+    *   List all new dependencies required for `requirements.txt`.
+    *   Outline the minimal unit and/or integration tests needed to validate each new feature or significant change.
+## Output Format Requirements
+*   Start the final report with a short ASCII diagram representing the *revised* system architecture (incorporating proposed changes).
+*   For each modified source file or new module:
+    *   Provide a one-sentence rationale for the change.
+    *   Include any required unified diff patches enclosed in triple backticks with the filename in the header.
+*   Group related changes together.
+*   Use plain paragraphs rather than long bullet lists where appropriate.
+*   Maintain concise prose.
+*   Ask brief clarifying questions if uncertainties arise during the process.
+## Process Constraint
+*   Wait for user confirmation before starting the review.
+*   The initial phase involves analysis and presenting the improvement plan (including architecture diagram, code review findings, refactor proposals, new feature designs, extra agent designs, and migration plan).
+*   Stop after presenting this initial plan and await further instructions or confirmation.

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff