Spaces:

rasulbrur
/

Financial-AI-Agent

Sleeping

App Files Files Community

rasulbrur commited on Apr 16

Commit

a2c10b6

1 Parent(s): d5b663d

Added files initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +1 -0
Dockerfile +41 -0
api/__pycache__/endpoints.cpython-310.pyc +0 -0
api/endpoints.py +151 -0
app.py +157 -0
data/financial data sp500 companies.csv +0 -0
data/financial_data.csv +0 -0
important.txt +11 -0
main.py +147 -0
modules/__pycache__/financial_query.cpython-310.pyc +0 -0
modules/__pycache__/get_balance_sheet.cpython-310.pyc +0 -0
modules/__pycache__/get_cash_flow.cpython-310.pyc +0 -0
modules/__pycache__/get_company_profile.cpython-310.pyc +0 -0
modules/__pycache__/get_financial_ratios.cpython-310.pyc +0 -0
modules/__pycache__/get_income_statement.cpython-310.pyc +0 -0
modules/__pycache__/get_income_tax.cpython-310.pyc +0 -0
modules/__pycache__/get_interest.cpython-310.pyc +0 -0
modules/__pycache__/get_market_cap.cpython-310.pyc +0 -0
modules/__pycache__/get_net_income.cpython-310.pyc +0 -0
modules/__pycache__/get_profit_margin.cpython-310.pyc +0 -0
modules/__pycache__/get_research_info.cpython-310.pyc +0 -0
modules/__pycache__/get_revenue.cpython-310.pyc +0 -0
modules/__pycache__/get_stock_price.cpython-310.pyc +0 -0
modules/get_balance_sheet.py +15 -0
modules/get_cash_flow.py +14 -0
modules/get_company_profile.py +15 -0
modules/get_cost_info.py +11 -0
modules/get_divident_info.py +14 -0
modules/get_earnings_per_share.py +14 -0
modules/get_financial_ratios.py +14 -0
modules/get_historical_stock_price.py +14 -0
modules/get_income_tax.py +11 -0
modules/get_interest.py +11 -0
modules/get_market_cap.py +14 -0
modules/get_net_income.py +14 -0
modules/get_profit_margin.py +14 -0
modules/get_research_info.py +11 -0
modules/get_revenue.py +14 -0
modules/get_stock_price.py +14 -0
rag/__pycache__/embedder.cpython-310.pyc +0 -0
rag/__pycache__/retriever.cpython-310.pyc +0 -0
rag/__pycache__/sql_db.cpython-310.pyc +0 -0
rag/__pycache__/web_search.cpython-310.pyc +0 -0
rag/embedder.py +20 -0
rag/graphrag.py +72 -0
rag/retriever.py +202 -0
rag/sql_db.py +171 -0
rag/web_search.py +12 -0
repo.jpg +0 -0
requirements.txt +69 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

Dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+# Use official Python image
+FROM python:3.10-slim
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y wget unzip curl gcc portaudio19-dev && \
+    rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy your code into the container
+COPY . .
+# Install Python dependencies
+RUN pip install --upgrade pip && pip install -r requirements.txt
+# Install spaCy model
+# RUN python -m spacy download en_core_web_lg
+RUN python -m spacy download en_core_web_sm
+# Download and unzip the Vosk model
+RUN wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip && \
+    unzip vosk-model-small-en-us-0.15.zip && \
+    rm vosk-model-small-en-us-0.15.zip
+# Install Ollama
+# RUN curl -fsSL https://ollama.com/install.sh | sh
+# Pull the Ollama model
+# RUN ollama serve & sleep 5 && ollama pull gemma:2b
+# Expose the port FastAPI will run on
+EXPOSE 7860
+# Start the API
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
+# CMD uvicorn app:app --host 0.0.0.0 --port $PORT

api/__pycache__/endpoints.cpython-310.pyc ADDED Viewed

Binary file (4.22 kB). View file

api/endpoints.py ADDED Viewed

	@@ -0,0 +1,151 @@

+# api/endpoints.py
+import httpx
+import os
+class FMPEndpoints:
+    def __init__(self):
+        # self.db = FinancialDB()
+        self.fmp_api_key = os.getenv("FMP_API_KEY")
+        # print(self.fmp_api_key)
+        self.base_url = "https://financialmodelingprep.com/api/v3"
+    async def get_income_statement(self, ticker, year=None, period="annual", limit=1):
+        """
+        Fetch income statement data for a given ticker.
+        """
+        endpoint = f"{self.base_url}/income-statement/{ticker}"
+        params = {"apikey": self.fmp_api_key, "period": period, "limit": limit}
+        if year:
+            params["year"] = year
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching income statement: {e}")
+    async def get_quote_short(self, ticker):
+        """
+        Fetch the current stock price (short quote) for a given ticker.
+        """
+        endpoint = f"{self.base_url}/quote-short/{ticker}"
+        params = {"apikey": self.fmp_api_key}
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching quote: {e}")
+    async def get_ratios(self, ticker, year=None, limit=1):
+        """
+        Fetch financial ratios for a given ticker.
+        """
+        endpoint = f"{self.base_url}/ratios/{ticker}"
+        params = {"apikey": self.fmp_api_key, "limit": limit}
+        if year:
+            params["year"] = year
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching ratios: {e}")
+    async def get_profile(self, ticker):
+        """
+        Fetch company profile data for a given ticker.
+        """
+        endpoint = f"{self.base_url}/profile/{ticker}"
+        params = {"apikey": self.fmp_api_key}
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching profile: {e}")
+    async def get_historical_price(self, ticker, date=None):
+        """
+        Fetch historical stock price for a given ticker on a specific date.
+        """
+        endpoint = f"{self.base_url}/historical-price-full/{ticker}"
+        params = {"apikey": self.fmp_api_key}
+        if date:
+            params["from"] = date
+            params["to"] = date
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching historical price: {e}")
+    async def get_balance_sheet(self, ticker, year=None, period="annual", limit=1):
+        """
+        Fetch balance sheet data for a given ticker.
+        """
+        endpoint = f"{self.base_url}/balance-sheet-statement/{ticker}"
+        params = {"apikey": self.fmp_api_key, "period": period, "limit": limit}
+        if year:
+            params["year"] = year
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching balance sheet: {e}")
+    async def get_cash_flow(self, ticker, year=None, period="annual", limit=1):
+        """
+        Fetch cash flow statement data for a given ticker.
+        """
+        endpoint = f"{self.base_url}/cash-flow-statement/{ticker}"
+        params = {"apikey": self.fmp_api_key, "period": period, "limit": limit}
+        if year:
+            params["year"] = year
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching cash flow: {e}")
+    async def get_key_metrics(self, ticker, year=None, limit=1):
+        """
+        Fetch key metrics (e.g., EPS) for a given ticker.
+        """
+        endpoint = f"{self.base_url}/key-metrics/{ticker}"
+        params = {"apikey": self.fmp_api_key, "limit": limit}
+        if year:
+            params["year"] = year
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get(endpoint, params=params)
+                response.raise_for_status()
+                return response.json()
+        except httpx.HTTPStatusError as e:
+            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
+        except Exception as e:
+            raise Exception(f"Error fetching key metrics: {e}")

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# app.py
+from fastapi import FastAPI, Request, Form
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from main import process_query
+from voice.speech_to_text import SpeechToText
+import os
+import asyncio
+import pyaudio
+import wave
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI()
+# Mount static files (for CSS, JS, etc.)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Set up templates
+templates = Jinja2Templates(directory="templates")
+# Vosk model path and audio file path
+vosk_model_path = "./vosk-model-small-en-us-0.15"
+audio_file_path = "voice/temp_audio.wav"
+# Ensure the voice directory exists
+os.makedirs("voice", exist_ok=True)
+# Initialize SpeechToText
+stt = SpeechToText(model_path=vosk_model_path)
+# Global variables for recording state
+recording = False
+audio_frames = []
+recording_task = None
+def save_audio_to_wav(frames, sample_rate=16000):
+    """Save audio frames to a WAV file."""
+    try:
+        logger.info(f"Saving audio to {audio_file_path} with {len(frames)} frames")
+        wf = wave.open(audio_file_path, 'wb')
+        wf.setnchannels(1)
+        wf.setsampwidth(2)  # 16-bit
+        wf.setframerate(sample_rate)
+        wf.writeframes(b''.join(frames))
+        wf.close()
+        if os.path.exists(audio_file_path):
+            logger.info(f"WAV file saved successfully: {os.path.getsize(audio_file_path)} bytes")
+        else:
+            logger.error("WAV file was not created")
+    except Exception as e:
+        logger.error(f"Error saving WAV file: {str(e)}")
+        raise
+async def record_audio():
+    """Background task to record audio."""
+    global audio_frames
+    p = pyaudio.PyAudio()
+    try:
+        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
+        stream.start_stream()
+        logger.info("Recording started...")
+        while recording:
+            data = stream.read(1024, exception_on_overflow=False)
+            audio_frames.append(data)
+            await asyncio.sleep(0.01)  # Small sleep to prevent blocking
+        stream.stop_stream()
+        stream.close()
+        logger.info(f"Recording stopped, captured {len(audio_frames)} frames")
+    except Exception as e:
+        logger.error(f"Error during recording: {str(e)}")
+    finally:
+        p.terminate()
+@app.get("/", response_class=HTMLResponse)
+async def get_index(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.post("/start_recording", response_class=JSONResponse)
+async def start_recording():
+    global recording, audio_frames, recording_task
+    if not recording:
+        recording = True
+        audio_frames = []
+        recording_task = asyncio.create_task(record_audio())
+        logger.info("Started recording task")
+        return {"status": "Recording started"}
+    logger.warning("Recording already in progress")
+    return {"status": "Already recording"}
+@app.post("/stop_recording", response_class=HTMLResponse)
+async def stop_recording(request: Request):
+    global recording, recording_task
+    if recording:
+        recording = False
+        if recording_task:
+            await recording_task  # Wait for the recording task to complete
+            recording_task = None
+        # Save the audio to WAV
+        try:
+            save_audio_to_wav(audio_frames)
+        except Exception as e:
+            logger.error(f"Failed to save audio: {str(e)}")
+            return templates.TemplateResponse("index.html", {
+                "request": request,
+                "error": f"Failed to save audio: {str(e)}"
+            })
+        # Transcribe the saved audio
+        try:
+            text = stt.transcribe_audio(audio_file_path)
+            logger.info(f"Transcription result: '{text}'")
+            if not text:
+                logger.warning("Transcription returned no text")
+                return templates.TemplateResponse("index.html", {
+                    "request": request,
+                    "error": "Could not understand the audio."
+                })
+            return templates.TemplateResponse("index.html", {
+                "request": request,
+                "transcribed_text": text
+            })
+        except Exception as e:
+            logger.error(f"Transcription error: {str(e)}")
+            return templates.TemplateResponse("index.html", {
+                "request": request,
+                "error": f"Transcription error: {str(e)}"
+            })
+    logger.warning("No recording in progress")
+    return templates.TemplateResponse("index.html", {
+        "request": request,
+        "error": "No recording in progress."
+    })
+@app.post("/query", response_class=HTMLResponse)
+async def handle_query(request: Request, query_text: str = Form(...), use_retriever: str = Form("no")):
+    use_retriever = use_retriever.lower() in ["yes", "y"]
+    result = await process_query(vosk_model_path, query_text=query_text, use_retriever=use_retriever)
+    return templates.TemplateResponse("index.html", {
+        "request": request,
+        "User_Query": query_text,
+        "Intent": result["intent"],
+        "Entities": result["entities"],
+        "API_Response": result["base_response"],
+        "RAG_Response": result["retriever_response"],
+        "Web_Search_Response": result["web_search_response"],
+        "Final_Response": result["final_response"],
+        "Error": result["error"]
+    })

data/financial data sp500 companies.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/financial_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

important.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+1. The full project is inside the codebase folder. To run the project,
+Create conda env using “conda create -n env_name python=3.10”
+Activate the env using “conda activate env_name”
+Install requirements.txt using, “pip install -r requirements.txt”
+Install the bash scripts using “source env_variable.sh” and “source setup.sh”
+Run “uvicorn app:app –reload”
+2. The “Documentation of AI Finance Accountant Agent” have full overview of the project explaining key features, required tools, data sources etc. I attached some testing examples at the end of the documentation file.
+3. The “demo_video.mkv” file is a demo of how my api agent is working. Carefully watch the video.

main.py ADDED Viewed

	@@ -0,0 +1,147 @@

+# main.py
+import asyncio
+import importlib
+from voice.speech_to_text import SpeechToText
+from voice.intent_classifier import IntentClassifier
+from api.endpoints import FMPEndpoints
+from rag.retriever import Retriever
+from rag.sql_db import SQL_Key_Pair
+from rag.web_search import duckduckgo_web_search
+async def process_query(vosk_model_path, audio_data=None, query_text=None, use_retriever=False):
+    # Step 1: Initialize components
+    stt = SpeechToText(model_path=vosk_model_path)
+    classifier = IntentClassifier()
+    endpoints = FMPEndpoints()
+    # initialize rag tools
+    retriever = Retriever(file_path="./data/financial_data.csv")
+    sql_db = SQL_Key_Pair(file_path="./data/financial_data.csv")
+    # Output format
+    output = {
+        "User asked": "",
+        "intent": "",
+        "entities": "",
+        "base_response": "",
+        "retriever_response": "",
+        "web_search_response": "",
+        "final_response": "",
+        "error": ""
+    }
+    try:
+        # Step 2: Process input (text or audio)
+        if audio_data:
+            text = stt.transcribe_audio(audio_data)
+            if not text:
+                output["error"] = "Could not understand the audio."
+                return output
+        elif query_text:
+            text = query_text
+        else:
+            output["error"] = "No audio or text query provided."
+            return output
+        output["User asked"] = text
+        # Step 3: Classify intent (zero-shot) and extract entities
+        intent = classifier.classify_with_llm(text)
+        output["intent"] = intent if intent else "Could not classify intent."
+        entities = classifier.extract_entities(text)
+        output["entities"] = str(entities)
+        if intent:
+            intent_to_module = {
+                "get_net_income": ("modules.get_net_income", "GetNetIncome"),
+                "get_revenue": ("modules.get_revenue", "GetRevenue"),
+                "get_stock_price": ("modules.get_stock_price", "GetStockPrice"),
+                "get_profit_margin": ("modules.get_profit_margin", "GetProfitMargin"),
+                "get_company_profile": ("modules.get_company_profile", "GetCompanyProfile"),
+                "get_market_cap": ("modules.get_market_cap", "GetMarketCap"),
+                "get_historical_stock_price": ("modules.get_historical_stock_price", "GetHistoricalStockPrice"),
+                "get_dividend_info": ("modules.get_dividend_info", "GetDividendInfo"),
+                "get_balance_sheet": ("modules.get_balance_sheet", "GetBalanceSheet"),
+                "get_cash_flow": ("modules.get_cash_flow", "GetCashFlow"),
+                "get_financial_ratios": ("modules.get_financial_ratios", "GetFinancialRatios"),
+                "get_earnings_per_share": ("modules.get_earnings_per_share", "GetEarningsPerShare"),
+                "get_interest": ("modules.get_interest", "GetInterest"),
+                "get_income_tax": ("modules.get_income_tax", "GetIncomeTax"),
+                "get_cost_info": ("modules.get_cost_info", "GetCostInfo"),
+                "get_research_info": ("modules.get_research_info", "GetResearchInfo")
+            }
+            # Identify module for API calling
+            module_info = intent_to_module.get(intent)
+            if module_info:
+                module_path, class_name = module_info
+                try:
+                    module = importlib.import_module(module_path)
+                    class_instance = getattr(module, class_name)()
+                    ticker = entities["ticker"]
+                    # Step 4: Get the base response from the module
+                    base_response = None
+                    try:
+                        base_response = await class_instance.get_data(
+                            ticker=ticker,
+                            year=entities["year"],
+                            date=entities["date"],
+                        )
+                    except Exception as e:
+                        base_response = f"Error fetching base response: {e}"
+                    # Step 5: Handle the response based on requirements
+                    final_response = None
+                    if base_response and "Error" not in str(base_response) and "None" not in str(base_response):
+                        # Base response succeeded
+                        final_response = base_response
+                        output["base_response"] = f"{final_response}"
+                        # Use retriever if specified (optional)
+                        if use_retriever:
+                            # retriever_response = retriever.retrieve(text, entities)
+                            # retriever_response = sql_db.entity_based_query(entities)
+                            retriever_response = sql_db.query_db(entities["ticker"], entities["metric"])
+                            final_response = f"{final_response} Additional Info found in the CSV: {retriever_response}"
+                            output["retriever_response"] = retriever_response
+                    else:
+                        # Base response failed, use the retriever
+                        output["base_response"] = f"{base_response} Using retriever to query CSV file..."
+                        # retriever_response = retriever.retrieve(text, entities)
+                        # retriever_response = sql_db.keyword_match_search(entities)
+                        retriever_response = sql_db.query_db(entities["ticker"], entities["metric"])
+                        output["retriever_response"] = retriever_response
+                        if "No relevant data found" in retriever_response:
+                            # If both API and rag failed to extract information, search on the web
+                            search_results = duckduckgo_web_search(text)
+                            if search_results:
+                                output["web_search_response"] = search_results[0]['snippet']
+                                final_response = search_results[0]['snippet']
+                            else:
+                                output["web_search_response"] = "No relevant data found on the web."
+                                final_response = "No relevant data found on the web."
+                        else:
+                            final_response = retriever_response
+                    output["final_response"] = final_response
+                except ImportError as e:
+                    output["error"] = f"Module import error: {e}"
+                except AttributeError as e:
+                    output["error"] = f"Class not found in module: {e}"
+                except Exception as e:
+                    output["error"] = f"Error processing intent {intent}: {e}"
+            else:
+                output["error"] = f"Unsupported intent: {intent}"
+        else:
+            output["error"] = "Could not classify intent."
+    except Exception as e:
+        output["error"] = f"Unexpected error: {e}"
+    # print(output)
+    # Return output to the User Interface
+    return output

modules/__pycache__/financial_query.cpython-310.pyc ADDED Viewed

Binary file (1.15 kB). View file

modules/__pycache__/get_balance_sheet.cpython-310.pyc ADDED Viewed

Binary file (1 kB). View file

modules/__pycache__/get_cash_flow.cpython-310.pyc ADDED Viewed

Binary file (924 Bytes). View file

modules/__pycache__/get_company_profile.cpython-310.pyc ADDED Viewed

Binary file (903 Bytes). View file

modules/__pycache__/get_financial_ratios.cpython-310.pyc ADDED Viewed

Binary file (917 Bytes). View file

modules/__pycache__/get_income_statement.cpython-310.pyc ADDED Viewed

Binary file (1.37 kB). View file

modules/__pycache__/get_income_tax.cpython-310.pyc ADDED Viewed

Binary file (586 Bytes). View file

modules/__pycache__/get_interest.cpython-310.pyc ADDED Viewed

Binary file (582 Bytes). View file

modules/__pycache__/get_market_cap.cpython-310.pyc ADDED Viewed

Binary file (847 Bytes). View file

modules/__pycache__/get_net_income.cpython-310.pyc ADDED Viewed

Binary file (904 Bytes). View file

modules/__pycache__/get_profit_margin.cpython-310.pyc ADDED Viewed

Binary file (910 Bytes). View file

modules/__pycache__/get_research_info.cpython-310.pyc ADDED Viewed

Binary file (595 Bytes). View file

modules/__pycache__/get_revenue.cpython-310.pyc ADDED Viewed

Binary file (900 Bytes). View file

modules/__pycache__/get_stock_price.cpython-310.pyc ADDED Viewed

Binary file (844 Bytes). View file

modules/get_balance_sheet.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# modules/get_balance_sheet.py
+from api.endpoints import FMPEndpoints
+class GetBalanceSheet:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_balance_sheet(ticker, year=year)
+            if not data:
+                return f"Error: No balance sheet data available for {ticker}."
+            assets = data[0].get("totalAssets", 0)
+            liabilities = data[0].get("totalLiabilities", 0)
+            return f"{ticker}'s assets for {year or 'the latest year'} are ${assets / 1_000_000_000:.2f} billion, and liabilities are ${liabilities / 1_000_000_000:.2f} billion."
+        except Exception as e:
+            return f"Error fetching balance sheet: {e}"

modules/get_cash_flow.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_cash_flow.py
+from api.endpoints import FMPEndpoints
+class GetCashFlow:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_cash_flow(ticker, year=year)
+            if not data:
+                return f"Error: No cash flow data available for {ticker}."
+            value = data[0].get("cashFlowFromOperatingActivities", 0)
+            return f"{ticker}'s cash from operations for {year or 'the latest year'} is ${value / 1_000_000_000:.2f} billion."
+        except Exception as e:
+            return f"Error fetching cash flow: {e}"

modules/get_company_profile.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# modules/get_company_profile.py
+from api.endpoints import FMPEndpoints
+class GetCompanyProfile:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_profile(ticker)
+            if not data:
+                return f"Error: No company profile data available for {ticker}."
+            ceo = data[0].get("ceo", "N/A")
+            sector = data[0].get("sector", "N/A")
+            return f"{ticker}'s CEO is {ceo} and it operates in the {sector} sector."
+        except Exception as e:
+            return f"Error fetching company profile: {e}"

modules/get_cost_info.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# modules/cost_info.py
+from api.endpoints import FMPEndpoints
+class GetCostInfo:
+    async def get_data(self, ticker):
+        endpoints = FMPEndpoints()
+        try:
+            # Placeholder for cost of goods sold / operation cost logic
+            return None
+        except Exception as e:
+            return None

modules/get_divident_info.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_dividend_info.py
+from api.endpoints import FMPEndpoints
+class GetDividendInfo:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_ratios(ticker, year=year)
+            if not data:
+                return f"Error: No dividend info available for {ticker}."
+            value = data[0].get("payoutRatio", 0) * 100
+            return f"{ticker}'s dividend payout ratio for {year or 'the latest year'} is {value:.2f}%."
+        except Exception as e:
+            return f"Error fetching dividend info: {e}"

modules/get_earnings_per_share.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_earnings_per_share.py
+from api.endpoints import FMPEndpoints
+class GetEarningsPerShare:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_key_metrics(ticker, year=year)
+            if not data:
+                return f"Error: No earnings per share data available for {ticker}."
+            value = data[0].get("eps", 0)
+            return f"{ticker}'s earnings per share for {year or 'the latest year'} is ${value:.2f}."
+        except Exception as e:
+            return f"Error fetching earnings per share: {e}"

modules/get_financial_ratios.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_financial_ratios.py
+from api.endpoints import FMPEndpoints
+class GetFinancialRatios:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_ratios(ticker, year=year)
+            if not data:
+                return f"Error: No financial ratios data available for {ticker}."
+            current_ratio = data[0].get("currentRatio", 0)
+            return f"{ticker}'s current ratio for {year or 'the latest year'} is {current_ratio:.2f}."
+        except Exception as e:
+            return f"Error fetching financial ratios: {e}"

modules/get_historical_stock_price.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_historical_stock_price.py
+from api.endpoints import FMPEndpoints
+class GetHistoricalStockPrice:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_historical_price(ticker, date=date)
+            if not data.get("historical"):
+                return f"Error: No historical stock price data available for {ticker} on {date}."
+            value = data["historical"][0].get("close", 0)
+            return f"{ticker}'s stock price on {date} was ${value:.2f}."
+        except Exception as e:
+            return f"Error fetching historical stock price: {e}"

modules/get_income_tax.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# modules/get_income_tax.py
+from api.endpoints import FMPEndpoints
+class GetIncomeTax:
+    async def get_data(self, ticker):
+        endpoints = FMPEndpoints()
+        try:
+            # Placeholder for income tax data logic
+            return None
+        except Exception as e:
+            return None

modules/get_interest.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# modules/get_interest.py
+from api.endpoints import FMPEndpoints
+class GetInterest:
+    async def get_data(self, ticker):
+        endpoints = FMPEndpoints()
+        try:
+            # Placeholder for interest expense/income logic
+            return None
+        except Exception as e:
+            return None

modules/get_market_cap.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_market_cap.py
+from api.endpoints import FMPEndpoints
+class GetMarketCap:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_profile(ticker)
+            if not data:
+                return f"Error: No market cap data available for {ticker}."
+            value = data[0].get("mktCap", 0) / 1_000_000_000
+            return f"{ticker}'s market cap is ${value:.2f} billion."
+        except Exception as e:
+            return f"Error fetching market cap: {e}"

modules/get_net_income.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_net_income.py
+from api.endpoints import FMPEndpoints
+class GetNetIncome:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_income_statement(ticker, year=year)
+            if not data:
+                return f"Error: No net income data available for {ticker}."
+            value = data[0].get("netIncome", 0)
+            return f"{ticker}'s net income for {year or 'the latest year'} is ${value / 1_000_000_000:.2f} billion."
+        except Exception as e:
+            return f"Error fetching net income: {e}"

modules/get_profit_margin.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_profit_margin.py
+from api.endpoints import FMPEndpoints
+class GetProfitMargin:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_ratios(ticker, year=year)
+            if not data:
+                return f"Error: No profit margin data available for {ticker}."
+            value = data[0].get("netProfitMargin", 0) * 100
+            return f"{ticker}'s profit margin for {year or 'the latest year'} is {value:.2f}%."
+        except Exception as e:
+            return f"Error fetching profit margin: {e}"

modules/get_research_info.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# modules/get_research_info.py
+from api.endpoints import FMPEndpoints
+class GetResearchInfo:
+    async def get_data(self, ticker):
+        endpoints = FMPEndpoints()
+        try:
+            # Placeholder for R&D info logic
+            return None
+        except Exception as e:
+            return None

modules/get_revenue.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_revenue.py
+from api.endpoints import FMPEndpoints
+class GetRevenue:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_income_statement(ticker, year=year)
+            if not data:
+                return f"Error: No revenue data available for {ticker}."
+            value = data[0].get("revenue", 0)
+            return f"{ticker}'s revenue for {year or 'the latest year'} is ${value / 1_000_000_000:.2f} billion."
+        except Exception as e:
+            return f"Error fetching revenue: {e}"

modules/get_stock_price.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# modules/get_stock_price.py
+from api.endpoints import FMPEndpoints
+class GetStockPrice:
+    async def get_data(self, ticker, year=None, date=None):
+        endpoints = FMPEndpoints()
+        try:
+            data = await endpoints.get_quote_short(ticker)
+            if not data:
+                return f"Error: No stock price data available for {ticker}."
+            value = data[0].get("price", 0)
+            return f"{ticker}'s current stock price is ${value:.2f}."
+        except Exception as e:
+            return f"Error fetching stock price: {e}"

rag/__pycache__/embedder.cpython-310.pyc ADDED Viewed

Binary file (968 Bytes). View file

rag/__pycache__/retriever.cpython-310.pyc ADDED Viewed

Binary file (4.61 kB). View file

rag/__pycache__/sql_db.cpython-310.pyc ADDED Viewed

Binary file (5.51 kB). View file

rag/__pycache__/web_search.cpython-310.pyc ADDED Viewed

Binary file (588 Bytes). View file

rag/embedder.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# rag/embedder.py
+from sentence_transformers import SentenceTransformer
+import numpy as np
+class Embedder:
+    def __init__(self, model_name="all-MiniLM-L6-v2"):   # "all-mpnet-base-v2"
+        self.model = SentenceTransformer(model_name)
+    def embed(self, texts):
+        """
+        Embed a list of texts into vectors.
+        Args:
+            texts (list of str): Texts to embed.
+        Returns:
+            numpy.ndarray: Embeddings.
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        embeddings = self.model.encode(texts, convert_to_numpy=True)
+        return embeddings

rag/graphrag.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+from neo4j import GraphDatabase
+from llama_index.core import (
+    SimpleDirectoryReader,
+    VectorStoreIndex,
+    StorageContext,
+    load_index_from_storage,
+    Settings,
+)
+from llama_index.llms.ollama import Ollama
+from llama_index.vector_stores.neo4jvector import Neo4jVectorStore
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+class GraphRAGRetriever:
+    def __init__(self, neo4j_url, neo4j_username, neo4j_password):
+        # Set up the embedding model
+        self.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        # Set up the Ollama LLM
+        self.llm = Ollama(model="gemma:2b", request_timeout=None)
+        # Configure Settings
+        Settings.llm = self.llm
+        Settings.embed_model = self.embed_model
+        # Set up the Neo4j driver
+        self.driver = GraphDatabase.driver(neo4j_url, auth=(neo4j_username, neo4j_password))
+        # Set up the Neo4j vector store
+        self.vector_store = Neo4jVectorStore(
+            url=neo4j_url,
+            username=neo4j_username,
+            password=neo4j_password,
+            embedding_dimension=384,  # Matches MiniLM model
+            driver=self.driver
+        )
+    def ingest_documents(self, directory_path):
+        # Load documents from the specified directory
+        documents = SimpleDirectoryReader(directory_path).load_data()
+        # Create the vector index
+        index = VectorStoreIndex.from_documents(
+            documents,
+            vector_store=self.vector_store,
+        )
+        # Persist the index to disk
+        index.storage_context.persist()
+    def query(self, question):
+        # Load the index from storage
+        storage_context = StorageContext.from_defaults(persist_dir="./storage")
+        index = load_index_from_storage(storage_context)
+        # Create a query engine and execute the query
+        query_engine = index.as_query_engine()
+        response = query_engine.query(question)
+        return str(response)
+if __name__ == "__main__":
+    retriever = GraphRAGRetriever(
+        neo4j_url="bolt://localhost:7687/",
+        neo4j_username="neo4j",
+        neo4j_password=os.getenv("NEO4J_PASSWORD")
+    )
+    retriever.ingest_documents("/home/bapary/Music/AI Finance Agent/rag/data")
+    answer = retriever.query("What is the revenue of Company Microsoft in 2021?")
+    print(answer)

rag/retriever.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import pandas as pd
+import faiss
+import numpy as np
+from .embedder import Embedder
+from fuzzywuzzy import fuzz
+from langchain_community.llms import Ollama
+from langchain.prompts import PromptTemplate
+class Retriever:
+    def __init__(self, file_path):
+        self.embedder = Embedder(model_name="all-MiniLM-L6-v2")
+        self.index = None
+        self.documents = []
+        self.data = None
+        self.embeddings = None
+        self.load_file(file_path)
+        self.build_index()
+    def load_file(self, file_path):
+        try:
+            if file_path.endswith('.csv'):
+                self.data = pd.read_csv(file_path)
+            elif file_path.endswith('.xlsx') or file_path.endswith('.xls'):
+                self.data = pd.read_excel(file_path)
+            else:
+                raise ValueError("Unsupported file format. Use .csv, .xlsx, or .xls")
+            self.documents = self.data["Ticker"].astype(str).tolist()
+        except Exception as e:
+            print(f"Error loading file: {e}")
+            self.documents = []
+            self.data = pd.DataFrame()
+    def build_index(self):
+        if not self.documents:
+            return
+        self.embeddings = self.embedder.embed(self.documents)
+        dim = self.embeddings.shape[1]
+        self.index = faiss.IndexFlatL2(dim)
+        self.index.add(self.embeddings)
+    def retrieve(self, query, entities, k=3, threshold=0.7):
+        query_prompt = f"{entities['ticker']} {entities['metric']} {entities['year']}"
+        # print(query_prompt)
+        if not self.index or not self.documents or self.data.empty:
+            return []
+        query_parts = query_prompt.split()
+        if len(query_parts) != 3:
+            print("Query must follow 'ticker metric year' pattern")
+            return []
+        query_ticker, query_metric, query_year = query_parts
+        # Ticker similarity
+        query_ticker_embedding = self.embedder.embed([query_ticker])
+        distances, indices = self.index.search(query_ticker_embedding, k)
+        ticker_matches = []
+        for i, idx in enumerate(indices[0]):
+            if idx < len(self.documents):
+                ticker = self.data.iloc[idx]["Ticker"]
+                similarity_score = 1 - distances[0][i] / 2
+                ticker_matches.append((ticker, similarity_score, idx))
+        # Metric similarity
+        metric_embeddings = self.embedder.embed(self.data.columns.tolist())
+        query_metric_embedding = self.embedder.embed([query_metric])[0]
+        metric_scores = []
+        for col, col_embedding in zip(self.data.columns, metric_embeddings):
+            if col.lower() in ["ticker", "year"]:
+                continue
+            cos_sim = np.dot(query_metric_embedding, col_embedding) / (
+                np.linalg.norm(query_metric_embedding) * np.linalg.norm(col_embedding)
+            )
+            metric_scores.append((col, cos_sim))
+        # Year similarity
+        if "Year" not in self.data.columns:
+            print("No 'Year' column found in data")
+            return []
+        year_scores = []
+        for year in self.data["Year"].astype(str).unique():
+            similarity = fuzz.ratio(query_year, year) / 100.0
+            year_scores.append((year, similarity))
+        # Combine matches
+        retrieved_data = []
+        seen = set()
+        for ticker, ticker_score, idx in ticker_matches:
+            if ticker_score < threshold:
+                continue
+            for metric, metric_score in metric_scores:
+                if metric_score < threshold:
+                    continue
+                for year, year_score in year_scores:
+                    if year_score < 0.5:
+                        continue
+                    combined_score = (ticker_score + metric_score + year_score) / 3
+                    match = self.data[
+                        (self.data["Ticker"].str.lower() == ticker.lower()) &
+                        (self.data["Year"].astype(str) == year) &
+                        (self.data[metric].notnull())
+                    ]
+                    if not match.empty:
+                        value = match[metric].iloc[0]
+                        key = (ticker, metric, year)
+                        if key not in seen:
+                            seen.add(key)
+                            retrieved_data.append({
+                                "ticker": ticker,
+                                "metric": metric,
+                                "value": value,
+                                "year": year,
+                                "combined_score": combined_score
+                            })
+        if retrieved_data:
+            # print(retrieved_data)
+            retrieved_data.sort(key=lambda x: x["combined_score"], reverse=True)
+            best_match = retrieved_data[0]
+            answer = answer_question(query, best_match)
+            return answer
+        return "No relevant data found."
+def answer_question(question, retrieved_data):
+    """
+    Use a lightweight LLM to generate a natural-language answer on CPU.
+    Args:
+        question (str): The question to answer
+        retrieved_data (list): List of dictionaries with ticker, metric, value, year
+    Returns:
+        str: Natural-language answer
+    """
+    # print(question)
+    # print(retrieved_data)
+    try:
+        # Initialize lightweight LLM (llama3.2:3b, CPU-friendly)
+        llm = Ollama(model="gemma:2b", num_gpu=0)  # Explicitly disable GPU
+        # Minimal prompt for CPU efficiency
+        prompt_template = PromptTemplate(
+            input_variables=["question", "ticker", "metric", "value", "year"],
+            template=(
+                "Question: {question}\n"
+                "Data: Ticker={ticker}, Metric={metric}, Value={value}, Year={year}\n"
+                "Answer concisely, formatting the value with commas."
+            )
+        )
+        # print(prompt_template)
+        # Format data
+        if not retrieved_data:
+            return "No relevant data found."
+        prompt = prompt_template.format(
+            question=question,
+            ticker=retrieved_data['ticker'],
+            metric=retrieved_data['metric'],
+            value=retrieved_data, # formatted_value,
+            year=retrieved_data['year']
+        )
+        # Generate response
+        response = llm.invoke(prompt)
+        return response.strip()
+    except Exception as e:
+        print(f"Error generating answer: {e}")
+        return "Unable to generate answer."
+# def main(file_path, query, question):
+#     """
+#     Main function to process a query, retrieve results, and answer a question.
+#     Args:
+#         file_path (str): Path to the CSV or Excel file
+#         query (str): Query string in 'ticker metric year' format
+#         question (str): Natural-language question to answer
+#     Returns:
+#         tuple: (retrieved data, answer)
+#     """
+#     try:
+#         retriever = Retriever(file_path)
+#         results = retriever.retrieve(query)
+#         answer = answer_question(question, results)
+#         return results, answer
+#     except Exception as e:
+#         print(f"Error processing query: {e}")
+#         return [], "Unable to process query."
+# if __name__ == "__main__":
+#     file_path = "./financial_data.csv"
+#     query = "AAPL InterestExpense 2024"
+#     question = "What is the InterestExpense of AAPL 2024?"
+#     results, answer = main(file_path, query, question)
+#     for result in results:
+#         print(f"Ticker: {result['ticker']}, Metric: {result['metric']}, Value: {result['value']}, Year: {result['year']}")
+#     print(f"Answer: {answer}")

rag/sql_db.py ADDED Viewed

	@@ -0,0 +1,171 @@

+# rag/retriever.py
+import os
+import pandas as pd
+import faiss
+import numpy as np
+import sqlite3
+from .embedder import Embedder
+from datetime import datetime
+class SQL_Key_Pair:
+    def __init__(self, file_path="financial data sp500 companies.csv", model_name="all-MiniLM-L6-v2", db_path="financial_data.db"):
+        self.embedder = Embedder(model_name)
+        self.index = None
+        self.documents = []
+        self.data = None
+        self.embeddings = None
+        self.db_conn = sqlite3.connect(db_path)
+        self.create_db_table()
+        self.load_data(file_path)
+    def create_db_table(self):
+        """
+        Create the custom_financials table in the database if it doesn’t exist.
+        """
+        cursor = self.db_conn.cursor()
+        cursor.execute("""
+            CREATE TABLE IF NOT EXISTS custom_financials (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                source_file TEXT,
+                firm TEXT,
+                ticker TEXT,
+                date TEXT,
+                metric TEXT,
+                value REAL,
+                last_updated TEXT
+            )
+        """)
+        self.db_conn.commit()
+    def load_data(self, file_path):
+        """
+        Load financial data from a CSV or Excel file and store it in the database.
+        """
+        try:
+            if file_path.endswith('.csv'):
+                df = pd.read_csv(file_path)
+            elif file_path.endswith('.xlsx'):
+                df = pd.read_excel(file_path)
+            else:
+                raise ValueError("Unsupported file format. Use .csv or .xlsx.")
+            self.data = df
+            self.documents = self.data["Ticker"].astype(str).tolist()
+            cursor = self.db_conn.cursor()
+            for _, row in self.data.iterrows():
+                firm = row.get("firm", "")
+                ticker = row.get("Ticker", "")
+                date = row.get("date", "")
+                for column in self.data.columns:
+                    if pd.notna(row[column]):
+                        try:
+                            value = float(row[column])
+                        except (ValueError, TypeError):
+                            value = 0.0
+                        cursor.execute("""
+                            INSERT INTO custom_financials (source_file, firm, ticker, date, metric, value, last_updated)
+                            VALUES (?, ?, ?, ?, ?, ?, ?)
+                        """, (os.path.basename(file_path), firm, ticker, str(date), column, value, datetime.now().isoformat()))
+            self.db_conn.commit()
+            print(f"Loaded {len(self.data)} rows from {file_path} into custom_financials.")
+            self.build_index()  # Rebuild FAISS index after loading
+        except Exception as e:
+            print(f"Error loading data: {e}")
+            self.documents = []
+            self.data = pd.DataFrame()
+    def build_index(self):
+        """
+        Build a FAISS index from the embedded descriptions.
+        """
+        if not self.documents:
+            return
+        self.embeddings = self.embedder.embed(self.documents)
+        dim = self.embeddings.shape[1]
+        self.index = faiss.IndexFlatL2(dim)
+        self.index.add(self.embeddings)
+    def keyword_match_search(self, entities):
+        """
+        Perform strict keyword match based search from CSV.
+        """
+        if self.data is None or self.data.empty:
+            return "No data loaded."
+        ticker = entities.get("ticker", "")
+        metric = entities.get("metric", "")
+        if not ticker or not metric:
+            return "No relevant data found."
+        ticker = ticker.lower()
+        metric = metric.lower()
+        retrieved_text = ""
+        for _, row in self.data.iterrows():
+            if str(row.get("Ticker", "")).lower() == ticker:
+                for col in self.data.columns:
+                    if col.lower() == metric:
+                        if pd.isna(row[col]) or row[col] == "":
+                            continue
+                        value_in_billions = row[col] / 1_000_000_000
+                        retrieved_text = f"Retrieved {metric} for {ticker} is : ${value_in_billions:.2f} billion."
+                        break
+                break
+        if not retrieved_text:
+            return "No relevant data found."
+        return retrieved_text
+    def query_csv(self, query, k=3):
+        """
+        Query the CSV data with a user query.
+        """
+        retrieved_data = self.retrieve(query, k=k)
+        if not retrieved_data:
+            return "No relevant data found."
+        responses = []
+        for entry in retrieved_data:
+            try:
+                value = float(entry["value"])
+                value_in_billions = value / 1_000_000_000
+                response = f"{entry['ticker']}'s {entry['metric']} for {entry['year']} was ${value_in_billions:.2f} billion."
+            except:
+                response = f"{entry['ticker']}'s {entry['metric']} for {entry['year']} was {entry['value']}."
+            responses.append(response)
+        return "\n".join(responses)
+    def entity_based_query(self, entities):
+        return self.keyword_match_search(entities)
+    def query_db(self, ticker, metric):
+        """
+        Query the custom_financials table based on ticker and metric, ignoring date and year.
+        """
+        try:
+            cursor = self.db_conn.cursor()
+            query = """
+                SELECT value FROM custom_financials
+                WHERE ticker = ? AND metric = ?
+                LIMIT 1
+            """
+            params = [ticker, metric]
+            cursor.execute(query, params)
+            result = cursor.fetchone()
+            if result:
+                value = result[0]
+                value_in_billions = value / 1_000_000_000
+                return f"{metric} for {ticker}: ${value_in_billions:.2f} billion."
+            return f"No {metric} data found for {ticker}."
+        except Exception as e:
+            print(f"Error querying database: {e}")
+            return f"Error querying database: {str(e)}"
+    def __del__(self):
+        self.db_conn.close()

rag/web_search.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from duckduckgo_search import DDGS
+def duckduckgo_web_search(query, max_results=1):
+    results = []
+    with DDGS() as ddgs:
+        for r in ddgs.text(query, region='wt-wt', safesearch='Off', max_results=max_results):
+            results.append({
+                "title": r["title"],
+                "href": r["href"],
+                "snippet": r["body"]
+            })
+    return results

repo.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,69 @@

+aiohttp==3.11.16
+banks==2.1.1
+blis==1.2.1
+catalogue==2.0.10
+certifi==2025.1.31
+click==8.1.8
+cloudpathlib==0.21.0
+colorama==0.4.6
+confection==0.1.5
+cymem==2.0.11
+dirtyjson==1.0.8
+distro==1.9.0
+duckduckgo-search
+fastapi
+faiss-cpu==1.10.0
+filetype==1.2.0
+fuzzywuzzy==0.18.0
+griffe==1.7.2
+httpx-sse==0.4.0
+iniconfig==2.1.0
+marisa-trie==1.2.1
+ml-dtypes==0.5.1
+murmurhash==1.0.12
+neo4j==5.28.1
+nest-asyncio==1.6.0
+nltk==3.9.1
+numpy==1.26.4
+packaging==23.2
+platformdirs==4.3.7
+pluggy==1.5.0
+primp==0.14.0
+pyaudio==0.2.14
+pydantic==2.11.2
+pydantic-core==2.33.1
+pydantic-settings==2.8.1
+pypdf==4.3.1
+pytest==8.3.5
+pyyaml==6.0.2
+requests==2.32.3
+rich==14.0.0
+scikit-learn==1.6.1
+sentence-transformers==2.6.1
+shellingham==1.5.4
+six==1.17.0
+smart-open==7.1.0
+spacy==3.8.5
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+spacy-lookups-data==1.0.5
+sqlalchemy==2.0.40
+srsly==2.5.1
+srt==3.5.3
+striprtf==0.0.26
+tensorboard==2.19.0
+tensorflow==2.19.0
+tf-keras==2.19.0
+thinc==8.3.4
+threadpoolctl==3.6.0
+tomli==2.2.1
+tqdm==4.67.1
+typer==0.15.2
+typing-inspection==0.4.0
+uvicorn==0.34.0
+vosk==0.3.45
+wasabi==1.1.3
+weasel==0.4.1
+yarl==1.19.0
+langchain
+langchain_community