Final_Assignment_Template

Running

App Files Files Community

cgoncalves commited on 15 days ago

Commit

480a5f3

verified ·

1 Parent(s): a65f0b8

Update agents.py

Browse files

Files changed (1) hide show

agents.py +133 -123

agents.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import re
 from datetime import datetime
 from typing import Annotated
@@ -28,19 +27,6 @@ from prompts import WEB_SEARCH_PROMPT, YOUTUBE_PROMPT, MULTIMODAL_PROMPT
 # Load environment variables from .env file
 load_dotenv()
-# Initialize OpenAI LLM (gpt-4o) for general and web search tasks
-openai_llm = ChatOpenAI(
-    model="gpt-4o",
-    use_responses_api=True,
-    api_key=os.getenv("OPENAI_API_KEY")
-)
-# Initialize Google Gemini LLM for YouTube and multimodal tasks
-google_llm = ChatGoogleGenerativeAI(
-    model="gemini-2.5-flash-preview-04-17",
-    google_api_key=os.getenv("GOOGLE_API_KEY"),
-)
 class AgentState(MessagesState):
     """
     State class for agent workflows, tracks the message history.
@@ -85,133 +71,157 @@ def youtube_transcript(video_url: str, raw: bool = False) -> str:
     except Exception as e:
         return f"An error occurred while fetching the transcript: {e}"
-# List of available tools for the agent (currently only YouTube transcript)
-tools = [youtube_transcript]
-def create_web_search_graph() -> StateGraph:
     """
-    Create the web search agent graph.
     Returns:
-        StateGraph: The compiled web search agent workflow.
     """
-    web_search_preview = [{"type": "web_search_preview"}]
-    # Bind the web search tool to the OpenAI LLM
-    llm_with_tools = openai_llm.bind_tools(web_search_preview)
-    def agent_node(state: AgentState) -> dict:
-        """
-        Node function for handling web search queries.
-        Args:
-            state (AgentState): The current agent state.
         Returns:
-            dict: Updated state with the LLM response.
         """
-        current_date = datetime.now().strftime("%B %d, %Y")
-        # Format the system prompt with the current date
-        system_message = SystemMessage(content=WEB_SEARCH_PROMPT.format(current_date=current_date))
-        # Re-bind tools for each invocation (defensive)
         web_search_preview = [{"type": "web_search_preview"}]
-        response = llm_with_tools.bind_tools(web_search_preview).invoke(
-            [system_message] + state.get("messages")
-        )
-        return {"messages": state.get("messages") + [response]}
-    # Build the workflow graph
-    workflow = StateGraph(AgentState)
-    workflow.add_node("agent", agent_node)
-    workflow.add_edge(START, "agent")
-    workflow.add_edge("agent", END)
-    return workflow.compile(name="web_search_agent")
-def create_youtube_viwer_graph() -> StateGraph:
-    """
-    Create the YouTube viewer agent graph.
-    Returns:
-        StateGraph: The compiled YouTube viewer agent workflow.
-    """
-    def agent_node(state: AgentState) -> dict:
         """
-        Node function for handling YouTube-related queries.
-        Args:
-            state (AgentState): The current agent state.
         Returns:
-            dict: Updated state with the LLM response.
         """
-        current_date = datetime.now().strftime("%B %d, %Y")
-        # Format the system prompt with the current date
-        system_message = SystemMessage(content=YOUTUBE_PROMPT.format(current_date=current_date))
-        # Bind the YouTube transcript tool to the Gemini LLM
-        llm_with_tools = google_llm.bind_tools(tools)
-        response = llm_with_tools.invoke([system_message] + state.get("messages"))
-        return {"messages": state.get("messages") + [response]}
-    # Build the workflow graph with tool node and conditional routing
-    workflow = StateGraph(AgentState)
-    workflow.add_node("llm", agent_node)
-    workflow.add_node("tools", ToolNode(tools))
-    workflow.set_entry_point("llm")
-    workflow.add_conditional_edges(
-        "llm",
-        tools_condition,
-        {
-            "tools": "tools",   # If tool is needed, go to tools node
-            "__end__": END,     # Otherwise, end the workflow
-        },
-    )
-    workflow.add_edge("tools", "llm")  # After tool, return to LLM node
-    return workflow.compile(name="youtube_viwer_agent")
-def create_multimodal_agent_graph() -> StateGraph:
-    """
-    Create the multimodal agent graph using Gemini for best multimodal support.
-    Returns:
-        StateGraph: The compiled multimodal agent workflow.
-    """
-    def agent_node(state: AgentState) -> dict:
         """
-        Node function for handling multimodal queries.
-        Args:
-            state (AgentState): The current agent state.
         Returns:
-            dict: Updated state with the LLM response.
         """
-        current_date = datetime.now().strftime("%B %d, %Y")
-        # Compose the system message with the multimodal prompt and current date
-        system_message = SystemMessage(content=MULTIMODAL_PROMPT + f" Today's date: {current_date}.")
-        messages = [system_message] + state.get("messages")
-        # Invoke Gemini LLM for multimodal reasoning
-        response = google_llm.invoke(messages)
-        return {"messages": state.get("messages") + [response]}
-    # Build the workflow graph
-    workflow = StateGraph(AgentState)
-    workflow.add_node("agent", agent_node)
-    workflow.add_edge(START, "agent")
-    workflow.add_edge("agent", END)
-    return workflow.compile(name="multimodal_agent")
-# Instantiate the agent graphs
-multimodal_agent = create_multimodal_agent_graph()
-web_search_agent = create_web_search_graph()
-youtube_agent = create_youtube_viwer_graph()
-# Create the supervisor workflow to route queries to the appropriate sub-agent
-supervisor_workflow = create_supervisor(
-    [web_search_agent, youtube_agent, multimodal_agent],
-    model=openai_llm,
-    prompt=(
-        "You are a supervisor. For each question, call one of your sub-agents and return their answer directly to the user. Do not modify, summarize, or rephrase the answer."
     )
-)
-# Compile the supervisor agent for use in the application
-supervisor_agent = supervisor_workflow.compile(name="supervisor_agent")

 import re
 from datetime import datetime
 from typing import Annotated
 # Load environment variables from .env file
 load_dotenv()
 class AgentState(MessagesState):
     """
     State class for agent workflows, tracks the message history.
     except Exception as e:
         return f"An error occurred while fetching the transcript: {e}"
+def build_supervisor_agent(openai_key, google_key):
     """
+    Build the supervisor agent with the provided API keys.
+    Args:
+        openai_key (str): OpenAI API key.
+        google_key (str): Google API key.
     Returns:
+        supervisor_agent: The compiled supervisor agent.
     """
+    # Initialize OpenAI LLM (gpt-4o) for general and web search tasks
+    openai_llm = ChatOpenAI(
+        model="gpt-4o",
+        use_responses_api=True,
+        api_key=openai_key
+    )
+    # Initialize Google Gemini LLM for YouTube and multimodal tasks
+    google_llm = ChatGoogleGenerativeAI(
+        model="gemini-2.5-flash-preview-04-17",
+        google_api_key=google_key,
+    )
+    tools = [youtube_transcript]
+    def create_web_search_graph() -> StateGraph:
+        """
+        Create the web search agent graph.
         Returns:
+            StateGraph: The compiled web search agent workflow.
         """
         web_search_preview = [{"type": "web_search_preview"}]
+        # Bind the web search tool to the OpenAI LLM
+        llm_with_tools = openai_llm.bind_tools(web_search_preview)
+        def agent_node(state: AgentState) -> dict:
+            """
+            Node function for handling web search queries.
+            Args:
+                state (AgentState): The current agent state.
+            Returns:
+                dict: Updated state with the LLM response.
+            """
+            current_date = datetime.now().strftime("%B %d, %Y")
+            # Format the system prompt with the current date
+            system_message = SystemMessage(content=WEB_SEARCH_PROMPT.format(current_date=current_date))
+            # Re-bind tools for each invocation (defensive)
+            web_search_preview = [{"type": "web_search_preview"}]
+            response = llm_with_tools.bind_tools(web_search_preview).invoke(
+                [system_message] + state.get("messages")
+            )
+            return {"messages": state.get("messages") + [response]}
+        # Build the workflow graph
+        workflow = StateGraph(AgentState)
+        workflow.add_node("agent", agent_node)
+        workflow.add_edge(START, "agent")
+        workflow.add_edge("agent", END)
+        return workflow.compile(name="web_search_agent")
+    def create_youtube_viwer_graph() -> StateGraph:
         """
+        Create the YouTube viewer agent graph.
         Returns:
+            StateGraph: The compiled YouTube viewer agent workflow.
         """
+        def agent_node(state: AgentState) -> dict:
+            """
+            Node function for handling YouTube-related queries.
+            Args:
+                state (AgentState): The current agent state.
+            Returns:
+                dict: Updated state with the LLM response.
+            """
+            current_date = datetime.now().strftime("%B %d, %Y")
+            # Format the system prompt with the current date
+            system_message = SystemMessage(content=YOUTUBE_PROMPT.format(current_date=current_date))
+            # Bind the YouTube transcript tool to the Gemini LLM
+            llm_with_tools = google_llm.bind_tools(tools)
+            response = llm_with_tools.invoke([system_message] + state.get("messages"))
+            return {"messages": state.get("messages") + [response]}
+        # Build the workflow graph with tool node and conditional routing
+        workflow = StateGraph(AgentState)
+        workflow.add_node("llm", agent_node)
+        workflow.add_node("tools", ToolNode(tools))
+        workflow.set_entry_point("llm")
+        workflow.add_conditional_edges(
+            "llm",
+            tools_condition,
+            {
+                "tools": "tools",   # If tool is needed, go to tools node
+                "__end__": END,     # Otherwise, end the workflow
+            },
+        )
+        workflow.add_edge("tools", "llm")  # After tool, return to LLM node
+        return workflow.compile(name="youtube_viwer_agent")
+    def create_multimodal_agent_graph() -> StateGraph:
         """
+        Create the multimodal agent graph using Gemini for best multimodal support.
         Returns:
+            StateGraph: The compiled multimodal agent workflow.
         """
+        def agent_node(state: AgentState) -> dict:
+            """
+            Node function for handling multimodal queries.
+            Args:
+                state (AgentState): The current agent state.
+            Returns:
+                dict: Updated state with the LLM response.
+            """
+            current_date = datetime.now().strftime("%B %d, %Y")
+            # Compose the system message with the multimodal prompt and current date
+            system_message = SystemMessage(content=MULTIMODAL_PROMPT + f" Today's date: {current_date}.")
+            messages = [system_message] + state.get("messages")
+            # Invoke Gemini LLM for multimodal reasoning
+            response = google_llm.invoke(messages)
+            return {"messages": state.get("messages") + [response]}
+        # Build the workflow graph
+        workflow = StateGraph(AgentState)
+        workflow.add_node("agent", agent_node)
+        workflow.add_edge(START, "agent")
+        workflow.add_edge("agent", END)
+        return workflow.compile(name="multimodal_agent")
+    # Instantiate the agent graphs
+    multimodal_agent = create_multimodal_agent_graph()
+    web_search_agent = create_web_search_graph()
+    youtube_agent = create_youtube_viwer_graph()
+    # Create the supervisor workflow to route queries to the appropriate sub-agent
+    supervisor_workflow = create_supervisor(
+        [web_search_agent, youtube_agent, multimodal_agent],
+        model=openai_llm,
+        prompt=(
+            "You are a supervisor. For each question, call one of your sub-agents and return their answer directly to the user. Do not modify, summarize, or rephrase the answer."
+        )
     )
+    # Compile the supervisor agent for use in the application
+    supervisor_agent = supervisor_workflow.compile(name="supervisor_agent")
+    return supervisor_agent