Spaces:

vectara
/

finance-assistant

Running on CPU Upgrade

App Files Files Community

ofermend commited on Mar 22

Commit

941e6a0

1 Parent(s): c9767eb

updated

Browse files

Files changed (3) hide show

agent.py +93 -82
requirements.txt +1 -1
st_app.py +7 -0

agent.py CHANGED Viewed

@@ -3,11 +3,14 @@ import pandas as pd
 import requests
 from functools import lru_cache
 from pydantic import Field, BaseModel
 from omegaconf import OmegaConf
 from vectara_agentic.agent import Agent
 from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
 from dotenv import load_dotenv
 load_dotenv(override=True)
@@ -34,7 +37,7 @@ years = range(2015, 2025)
 initial_prompt = "How can I help you today?"
 # Tool to get the income statement for a given company and year using the FMP API
-@lru_cache(maxsize=128)
 def fmp_income_statement(
     ticker: str = Field(description="the ticker symbol of the company.", examples=["AAPL", "GOOG", "AMZN"]),
     year: int = Field(description="the year for which to get the income statement.", examples=[2020, 2021, 2022]),
@@ -49,6 +52,8 @@ def fmp_income_statement(
         A dictionary with the income statement data.
         All data is in USD, but you can convert it to more compact form like K, M, B.
     """
     fmp_api_key = os.environ.get("FMP_API_KEY", None)
     if fmp_api_key is None:
         return "FMP_API_KEY environment variable not set. This tool does not work."
@@ -65,97 +70,99 @@ def fmp_income_statement(
         ]
         values_dict = income_statement_specific_year.to_dict(orient="records")[0]
         return f"Financial results: {', '.join([f'{key}={value}' for key, value in values_dict.items() if key not in ['date', 'cik', 'link', 'finalLink']])}"
     return f"FMP API returned error {response.status_code}. This tool does not work."
-def create_assistant_tools(cfg):
-    def get_company_info() -> list[str]:
-        """
-        Returns a dictionary of companies you can query about. Always check this before using any other tool.
-        The output is a dictionary of valid ticker symbols mapped to company names.
-        You can use this to identify the companies you can query about, and their ticker information.
-        """
-        return tickers
-    def get_valid_years() -> list[str]:
-        """
-        Returns a list of the years for which financial reports are available.
-        Always check this before using any other tool.
-        """
-        return years
-    class QueryTranscriptsArgs(BaseModel):
-        query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"])
-        year: int | str = Field(
-            default=None,
-            description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year",
-            examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)']
         )
-        ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.")
-    vec_factory = VectaraToolFactory(vectara_api_key=cfg.api_key,
-                                     vectara_corpus_key=cfg.corpus_key)
-    summarizer = 'vectara-experimental-summary-ext-2023-12-11-med-omni'
-    ask_transcripts = vec_factory.create_rag_tool(
-        tool_name = "ask_transcripts",
-        tool_description = """
-        Given a company name and year, responds to a user question about the company, based on analyst call transcripts about the company's financial reports for that year.
-        You can ask this tool any question about the company including risks, opportunities, financial performance, competitors and more.
-        """,
-        tool_args_schema = QueryTranscriptsArgs,
-        reranker = "multilingual_reranker_v1", rerank_k = 100,
-        n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.005,
-        summary_num_results = 10,
-        vectara_summarizer = summarizer,
-        include_citations = True,
-        verbose=False,
-    )
-    class SearchTranscriptsArgs(BaseModel):
-        query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"])
-        top_k: int = Field(..., description="The number of results to return.")
-        year: int | str = Field(
-            default=None,
-            description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year",
-            examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)']
         )
-        ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.")
-    search_transcripts = vec_factory.create_search_tool(
-        tool_name = "search_transcripts",
-        tool_description = """
-        Given a company name and year, and a user query, retrieves the most relevant text from analyst call transcripts about the company related to the user query.
-        """,
-        tool_args_schema = SearchTranscriptsArgs,
-        reranker = "multilingual_reranker_v1", rerank_k = 100,
-        lambda_val = 0.005,
-        verbose=False
-    )
-    tools_factory = ToolsFactory()
-    return (
-            [tools_factory.create_tool(tool) for tool in
-                [
-                    get_company_info,
-                    get_valid_years,
-                    fmp_income_statement,
-                ]
-            ] +
-            tools_factory.financial_tools() +
-            [ask_transcripts, search_transcripts]
-    )
 def initialize_agent(_cfg, agent_progress_callback=None):
     financial_bot_instructions = """
     - You are a helpful financial assistant, with expertise in financial reporting, in conversation with a user.
-    - Always use the 'income_statement' tool to obtain accurate financial data like revenues, expenses, net income, and other financial metrics
-      for a specific company, for any the year 2020 or later.
-    - Use the 'fmp_income_statement' tool (with the company ticker and year) to obtain financial data for any year before 2020,
-    - Use the 'fmp_income_statement` tool (with the company ticker and year) to obtain financial data for any year on or after 2020, when the 'income_statement'
-      did not return any data useful to respond to the user query.
     - Always check the 'get_company_info' and 'get_valid_years' tools to validate company and year are valid.
-    - Use the ask_transcripts tool to answer most questions about the company's financial performance, risks, opportunities, strategy, competitors, and more.
     - Respond in a compact format by using appropriate units of measure (e.g., K for thousands, M for millions, B for billions).
       Do not report the same number twice (e.g. $100K and 100,000 USD).
     - Do not include URLs unless they are provided in the output of a tool you use.
@@ -165,17 +172,21 @@ def initialize_agent(_cfg, agent_progress_callback=None):
     def query_logging(query: str, response: str):
         print(f"Logging query={query}, response={response}")
     agent = Agent(
-        tools=create_assistant_tools(_cfg),
         topic="Financial data, annual reports and 10-K filings",
         custom_instructions=financial_bot_instructions,
         agent_progress_callback=agent_progress_callback,
         query_logging_callback=query_logging,
     )
     agent.report()
     return agent
 def get_agent_config() -> OmegaConf:
     companies = ", ".join(tickers.values())
     cfg = OmegaConf.create({

 import requests
 from functools import lru_cache
 from pydantic import Field, BaseModel
+from typing import Any, Optional
 from omegaconf import OmegaConf
 from vectara_agentic.agent import Agent
 from vectara_agentic.tools import ToolsFactory, VectaraToolFactory
+from vectara_agentic.agent_config import AgentConfig
+from vectara_agentic.sub_query_workflow import SubQuestionQueryWorkflow
 from dotenv import load_dotenv
 load_dotenv(override=True)
 initial_prompt = "How can I help you today?"
 # Tool to get the income statement for a given company and year using the FMP API
+@lru_cache(maxsize=256)
 def fmp_income_statement(
     ticker: str = Field(description="the ticker symbol of the company.", examples=["AAPL", "GOOG", "AMZN"]),
     year: int = Field(description="the year for which to get the income statement.", examples=[2020, 2021, 2022]),
         A dictionary with the income statement data.
         All data is in USD, but you can convert it to more compact form like K, M, B.
     """
+    if ticker not in tickers or year not in years:
+        return "Invalid ticker or year. Please call this tool with a valid company ticker and year."
     fmp_api_key = os.environ.get("FMP_API_KEY", None)
     if fmp_api_key is None:
         return "FMP_API_KEY environment variable not set. This tool does not work."
         ]
         values_dict = income_statement_specific_year.to_dict(orient="records")[0]
         return f"Financial results: {', '.join([f'{key}={value}' for key, value in values_dict.items() if key not in ['date', 'cik', 'link', 'finalLink']])}"
     return f"FMP API returned error {response.status_code}. This tool does not work."
+def get_company_info() -> list[str]:
+    """
+    Returns a dictionary of companies you can query about. Always check this before using any other tool.
+    The output is a dictionary of valid ticker symbols mapped to company names.
+    You can use this to identify the companies you can query about, and their ticker information.
+    """
+    return tickers
+def get_valid_years() -> list[str]:
+    """
+    Returns a list of the years for which financial reports are available.
+    Always check this before using any other tool.
+    """
+    return years
+class AgentTools:
+    def __init__(self, _cfg, agent_config):
+        self.tools_factory = ToolsFactory()
+        self.agent_config = agent_config
+        self.cfg = _cfg
+        self.vec_factory = VectaraToolFactory(vectara_api_key=_cfg.api_key,
+                                              vectara_corpus_key=_cfg.corpus_key)
+    def get_tools(self):
+        class QueryTranscriptsArgs(BaseModel):
+            query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"])
+            year: int | str = Field(
+                default=None,
+                description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year",
+                examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)']
+            )
+            ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.")
+        vec_factory = VectaraToolFactory(vectara_api_key=self.cfg.api_key,
+                                        vectara_corpus_key=self.cfg.corpus_key)
+        summarizer = 'vectara-summary-table-md-query-ext-jan-2025-gpt-4o'
+        ask_transcripts = vec_factory.create_rag_tool(
+            tool_name = "ask_transcripts",
+            tool_description = """
+            Given a company name and year, responds to a user question about the company, based on analyst call transcripts about the company's financial reports for that year.
+            You can ask this tool any question about the company including risks, opportunities, financial performance, competitors and more.
+            """,
+            tool_args_schema = QueryTranscriptsArgs,
+            reranker = "multilingual_reranker_v1", rerank_k = 100, rerank_cutoff = 0.1,
+            n_sentences_before = 2, n_sentences_after = 4, lambda_val = 0.005,
+            summary_num_results = 15,
+            vectara_summarizer = summarizer,
+            include_citations = True,
+            verbose=False,
         )
+        class SearchTranscriptsArgs(BaseModel):
+            query: str = Field(..., description="The user query, always in the form of a question", examples=["what are the risks reported?", "who are the competitors?"])
+            top_k: int = Field(..., description="The number of results to return.")
+            year: int | str = Field(
+                default=None,
+                description=f"The year this query relates to. An integer between {min(years)} and {max(years)} or a string specifying a condition on the year",
+                examples=[2020, '>2021', '<2023', '>=2021', '<=2023', '[2021, 2023]', '[2021, 2023)']
+            )
+            ticker: str = Field(..., description=f"The company ticker this query relates to. Must be a valid ticket symbol from the list {list(tickers.keys())}.")
+        search_transcripts = vec_factory.create_search_tool(
+            tool_name = "search_transcripts",
+            tool_description = """
+            Given a company name and year, and a user query, retrieves relevant documents about the company.
+            """,
+            tool_args_schema = SearchTranscriptsArgs,
+            reranker = "multilingual_reranker_v1", rerank_k = 100,
+            lambda_val = 0.005,
+            verbose=False
         )
+        tools_factory = ToolsFactory()
+        return (
+                [tools_factory.create_tool(tool) for tool in
+                    [
+                        get_company_info,
+                        get_valid_years,
+                        fmp_income_statement,
+                    ]
+                ] +
+                [ask_transcripts, search_transcripts]
+        )
 def initialize_agent(_cfg, agent_progress_callback=None):
     financial_bot_instructions = """
     - You are a helpful financial assistant, with expertise in financial reporting, in conversation with a user.
+    - Use the 'fmp_income_statement' tool (with the company ticker and year) to obtain financial data.
     - Always check the 'get_company_info' and 'get_valid_years' tools to validate company and year are valid.
+    - Use the 'ask_transcripts' tool to answer most questions about the company's financial performance, risks, opportunities, strategy, competitors, and more.
     - Respond in a compact format by using appropriate units of measure (e.g., K for thousands, M for millions, B for billions).
       Do not report the same number twice (e.g. $100K and 100,000 USD).
     - Do not include URLs unless they are provided in the output of a tool you use.
     def query_logging(query: str, response: str):
         print(f"Logging query={query}, response={response}")
+    agent_config = AgentConfig()
     agent = Agent(
+        tools=AgentTools(_cfg, agent_config).get_tools(),
         topic="Financial data, annual reports and 10-K filings",
         custom_instructions=financial_bot_instructions,
         agent_progress_callback=agent_progress_callback,
         query_logging_callback=query_logging,
+        verbose=True,
+        #workflow_cls=SubQuestionQueryWorkflow,
     )
     agent.report()
     return agent
 def get_agent_config() -> OmegaConf:
     companies = ", ".join(tickers.values())
     cfg = OmegaConf.create({

requirements.txt CHANGED Viewed

@@ -6,4 +6,4 @@ streamlit_feedback==0.1.3
 uuid==1.30
 langdetect==1.0.9
 langcodes==3.4.0
-vectara-agentic==0.2.1

 uuid==1.30
 langdetect==1.0.9
 langcodes==3.4.0
+vectara-agentic==0.2.5

st_app.py CHANGED Viewed

@@ -19,6 +19,10 @@ def format_log_msg(log_msg: str):
 def agent_progress_callback(status_type: AgentStatusType, msg: str):
     output = f'<span style="color:blue;">{status_type.value}</span>: {msg}'
     st.session_state.log_messages.append(output)
     if 'status' in st.session_state:
         latest_message = ''
@@ -140,6 +144,9 @@ async def launch_bot():
         with st.chat_message("assistant", avatar='🤖'):
             st.session_state.status = st.status('Processing...', expanded=False)
             response = st.session_state.agent.chat(st.session_state.prompt)
             res = escape_dollars_outside_latex(response.response)
             #response = await st.session_state.agent.achat(st.session_state.prompt)

 def agent_progress_callback(status_type: AgentStatusType, msg: str):
     output = f'<span style="color:blue;">{status_type.value}</span>: {msg}'
+    if "log_messages" not in st.session_state:
+        st.session_state.log_messages = [output]
+    else:
+        st.session_state.log_messages.append(output)
     st.session_state.log_messages.append(output)
     if 'status' in st.session_state:
         latest_message = ''
         with st.chat_message("assistant", avatar='🤖'):
             st.session_state.status = st.status('Processing...', expanded=False)
             response = st.session_state.agent.chat(st.session_state.prompt)
+            # from vectara_agentic.sub_query_workflow import SubQuestionQueryWorkflow
+            # response = await st.session_state.agent.run(inputs=SubQuestionQueryWorkflow.InputsModel(query=st.session_state.prompt))
             res = escape_dollars_outside_latex(response.response)
             #response = await st.session_state.agent.achat(st.session_state.prompt)