Spaces:
Running
Running
Added Rag
Browse files
app.py
CHANGED
@@ -1,14 +1,25 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import
|
4 |
import traceback
|
|
|
|
|
|
|
|
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
from langchain_groq import ChatGroq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
ALPHA_VANTAGE_API_KEY = st.secrets["ALPHA_VANTAGE_API_KEY"]
|
10 |
-
GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
|
11 |
-
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
12 |
|
13 |
try:
|
14 |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
|
@@ -17,19 +28,9 @@ except Exception as e:
|
|
17 |
st.error("β Failed to initialize Groq LLM.")
|
18 |
traceback.print_exc()
|
19 |
|
20 |
-
|
21 |
-
try:
|
22 |
-
doc = pymupdf.open(stream=uploaded_file.read(), filetype="pdf")
|
23 |
-
full_text = "".join(page.get_text() for page in doc)
|
24 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=max_length, chunk_overlap=200)
|
25 |
-
chunks = text_splitter.split_text(full_text)
|
26 |
-
|
27 |
-
return chunks
|
28 |
-
except Exception as e:
|
29 |
-
st.error("β Failed to extract text from PDF.")
|
30 |
-
traceback.print_exc()
|
31 |
-
return ["Error extracting text."]
|
32 |
|
|
|
33 |
def fetch_financial_data(company_ticker):
|
34 |
if not company_ticker:
|
35 |
return "No ticker symbol provided. Please enter a valid company ticker."
|
@@ -37,13 +38,13 @@ def fetch_financial_data(company_ticker):
|
|
37 |
try:
|
38 |
overview_url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
39 |
overview_response = requests.get(overview_url)
|
40 |
-
|
41 |
if overview_response.status_code == 200:
|
42 |
overview_data = overview_response.json()
|
43 |
market_cap = overview_data.get("MarketCapitalization", "N/A")
|
44 |
else:
|
45 |
-
st.error(f"β Failed to fetch company overview. Status Code: {overview_response.status_code}")
|
46 |
return "Error fetching company overview."
|
|
|
47 |
income_url = f"https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
48 |
income_response = requests.get(income_url)
|
49 |
|
@@ -52,49 +53,114 @@ def fetch_financial_data(company_ticker):
|
|
52 |
annual_reports = income_data.get("annualReports", [])
|
53 |
revenue = annual_reports[0].get("totalRevenue", "N/A") if annual_reports else "N/A"
|
54 |
else:
|
55 |
-
st.error(f"β Failed to fetch income statement. Status Code: {income_response.status_code}")
|
56 |
return "Error fetching income statement."
|
57 |
|
58 |
return f"Market Cap: ${market_cap}\nTotal Revenue: ${revenue}"
|
59 |
|
60 |
except Exception as e:
|
61 |
-
st.error("β Exception in fetching financial data.")
|
62 |
traceback.print_exc()
|
63 |
return "Error fetching financial data."
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def generate_response(user_query, company_ticker, mode, uploaded_file):
|
66 |
try:
|
67 |
-
if mode == "PDF Upload Mode":
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
72 |
financial_info = fetch_financial_data(company_ticker)
|
73 |
prompt = f"Analyze the financial status of {company_ticker} based on:\n{financial_info}\n\nUser Query: {user_query}"
|
74 |
else:
|
75 |
return "Invalid mode selected."
|
76 |
-
|
77 |
response = llm.invoke(prompt)
|
78 |
return response.content
|
79 |
except Exception as e:
|
80 |
-
st.error("β Failed to generate AI response.")
|
81 |
traceback.print_exc()
|
82 |
return "Error generating response."
|
83 |
|
84 |
-
st.title("π AI-Powered Financial Insights Chatbot")
|
85 |
-
st.write("Upload financial reports or fetch live financial data ")
|
86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
mode = st.radio("
|
91 |
-
uploaded_file = st.file_uploader("Upload PDF (Only for PDF Mode)", type=["pdf"])
|
92 |
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
st.error("β Please upload a PDF file.")
|
|
|
|
|
96 |
else:
|
97 |
-
with st.spinner("Processing
|
98 |
response = generate_response(user_query, company_ticker, mode, uploaded_file)
|
99 |
-
st.
|
|
|
100 |
st.write(response)
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pdfplumber
|
3 |
+
import re
|
4 |
import traceback
|
5 |
+
import faiss
|
6 |
+
import numpy as np
|
7 |
+
import requests
|
8 |
+
from rank_bm25 import BM25Okapi
|
9 |
from sentence_transformers import SentenceTransformer
|
10 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
from langchain_groq import ChatGroq
|
12 |
+
import torch
|
13 |
+
import os
|
14 |
+
|
15 |
+
st.set_page_config(page_title="Financial Insights Chatbot", page_icon="π", layout="wide")
|
16 |
+
|
17 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
+
|
19 |
+
|
20 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
21 |
+
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
|
22 |
|
|
|
|
|
|
|
23 |
|
24 |
try:
|
25 |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
|
|
|
28 |
st.error("β Failed to initialize Groq LLM.")
|
29 |
traceback.print_exc()
|
30 |
|
31 |
+
embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
34 |
def fetch_financial_data(company_ticker):
|
35 |
if not company_ticker:
|
36 |
return "No ticker symbol provided. Please enter a valid company ticker."
|
|
|
38 |
try:
|
39 |
overview_url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
40 |
overview_response = requests.get(overview_url)
|
41 |
+
|
42 |
if overview_response.status_code == 200:
|
43 |
overview_data = overview_response.json()
|
44 |
market_cap = overview_data.get("MarketCapitalization", "N/A")
|
45 |
else:
|
|
|
46 |
return "Error fetching company overview."
|
47 |
+
|
48 |
income_url = f"https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
|
49 |
income_response = requests.get(income_url)
|
50 |
|
|
|
53 |
annual_reports = income_data.get("annualReports", [])
|
54 |
revenue = annual_reports[0].get("totalRevenue", "N/A") if annual_reports else "N/A"
|
55 |
else:
|
|
|
56 |
return "Error fetching income statement."
|
57 |
|
58 |
return f"Market Cap: ${market_cap}\nTotal Revenue: ${revenue}"
|
59 |
|
60 |
except Exception as e:
|
|
|
61 |
traceback.print_exc()
|
62 |
return "Error fetching financial data."
|
63 |
|
64 |
+
|
65 |
+
def extract_and_embed_text(pdf_file):
|
66 |
+
"""Processes PDFs and generates embeddings with GPU acceleration."""
|
67 |
+
try:
|
68 |
+
docs, tokenized_texts = [], []
|
69 |
+
with pdfplumber.open(pdf_file) as pdf:
|
70 |
+
for page in pdf.pages:
|
71 |
+
text = page.extract_text()
|
72 |
+
if text:
|
73 |
+
chunks = text_splitter.split_text(text)
|
74 |
+
for chunk in chunks:
|
75 |
+
docs.append(chunk)
|
76 |
+
tokenized_texts.append(chunk.split())
|
77 |
+
|
78 |
+
embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
|
79 |
+
|
80 |
+
embedding_dim = embeddings.shape[1]
|
81 |
+
index = faiss.IndexHNSWFlat(embedding_dim, 32)
|
82 |
+
index.add(embeddings)
|
83 |
+
|
84 |
+
bm25 = BM25Okapi(tokenized_texts)
|
85 |
+
|
86 |
+
return docs, embeddings, index, bm25
|
87 |
+
except Exception as e:
|
88 |
+
traceback.print_exc()
|
89 |
+
return [], [], None, None
|
90 |
+
|
91 |
+
def retrieve_relevant_docs(user_query, docs, index, bm25):
|
92 |
+
"""Hybrid search using FAISS cosine similarity & BM25 keyword retrieval."""
|
93 |
+
query_embedding = embedding_model.encode(user_query, convert_to_numpy=True, normalize_embeddings=True)
|
94 |
+
_, faiss_indices = index.search(np.array([query_embedding]), 8)
|
95 |
+
bm25_scores = bm25.get_scores(user_query.split())
|
96 |
+
bm25_indices = np.argsort(bm25_scores)[::-1][:8]
|
97 |
+
combined_indices = list(set(faiss_indices[0]) | set(bm25_indices))
|
98 |
+
|
99 |
+
return [docs[i] for i in combined_indices[:3]]
|
100 |
+
|
101 |
def generate_response(user_query, company_ticker, mode, uploaded_file):
|
102 |
try:
|
103 |
+
if mode == "π PDF Upload Mode":
|
104 |
+
docs, embeddings, index, bm25 = extract_and_embed_text(uploaded_file)
|
105 |
+
if not docs:
|
106 |
+
return "β Error extracting text from PDF."
|
107 |
+
|
108 |
+
retrieved_docs = retrieve_relevant_docs(user_query, docs, index, bm25)
|
109 |
+
context = "\n\n".join(retrieved_docs)
|
110 |
+
prompt = f"Summarize the key financial insights from this document:\n\n{context}"
|
111 |
+
|
112 |
+
elif mode == "π Live Data Mode":
|
113 |
financial_info = fetch_financial_data(company_ticker)
|
114 |
prompt = f"Analyze the financial status of {company_ticker} based on:\n{financial_info}\n\nUser Query: {user_query}"
|
115 |
else:
|
116 |
return "Invalid mode selected."
|
117 |
+
|
118 |
response = llm.invoke(prompt)
|
119 |
return response.content
|
120 |
except Exception as e:
|
|
|
121 |
traceback.print_exc()
|
122 |
return "Error generating response."
|
123 |
|
|
|
|
|
124 |
|
125 |
+
st.markdown(
|
126 |
+
"<h1 style='text-align: center; color: #4CAF50;'>π AI-Powered Financial Insights Chatbot</h1>",
|
127 |
+
unsafe_allow_html=True
|
128 |
+
)
|
129 |
+
st.markdown(
|
130 |
+
"<h5 style='text-align: center; color: #666;'>Analyze financial reports or fetch live financial data effortlessly!</h5>",
|
131 |
+
unsafe_allow_html=True
|
132 |
+
)
|
133 |
+
|
134 |
+
col1, col2 = st.columns(2)
|
135 |
|
136 |
+
with col1:
|
137 |
+
st.markdown("### π’ **Choose Your Analysis Mode**")
|
138 |
+
mode = st.radio("", ["π PDF Upload Mode", "π Live Data Mode"], horizontal=True)
|
|
|
139 |
|
140 |
+
with col2:
|
141 |
+
st.markdown("### π **Enter Your Query**")
|
142 |
+
user_query = st.text_input("π¬ What financial insights are you looking for?")
|
143 |
+
|
144 |
+
st.markdown("---")
|
145 |
+
if mode == "π PDF Upload Mode":
|
146 |
+
st.markdown("### π Upload Your Financial Report")
|
147 |
+
uploaded_file = st.file_uploader("πΌ Upload PDF (Only for PDF Mode)", type=["pdf"])
|
148 |
+
company_ticker = None
|
149 |
+
else:
|
150 |
+
st.markdown("### π Live Market Data")
|
151 |
+
company_ticker = st.text_input("π’ Enter Company Ticker Symbol", placeholder="e.g., AAPL, MSFT")
|
152 |
+
uploaded_file = None
|
153 |
+
|
154 |
+
if st.button("π Analyze Now"):
|
155 |
+
if mode == "π PDF Upload Mode" and not uploaded_file:
|
156 |
st.error("β Please upload a PDF file.")
|
157 |
+
elif mode == "π Live Data Mode" and not company_ticker:
|
158 |
+
st.error("β Please enter a valid company ticker symbol.")
|
159 |
else:
|
160 |
+
with st.spinner("π Your Query is Processing, this can take upto 5 - 7 minutesβ³"):
|
161 |
response = generate_response(user_query, company_ticker, mode, uploaded_file)
|
162 |
+
st.markdown("---")
|
163 |
+
st.markdown("<h3 style='color: #4CAF50;'>π‘ AI Response</h3>", unsafe_allow_html=True)
|
164 |
st.write(response)
|
165 |
+
|
166 |
+
st.markdown("---")
|