Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
import re
|
4 |
import traceback
|
5 |
import faiss
|
@@ -16,11 +16,9 @@ st.set_page_config(page_title="Financial Insights Chatbot", page_icon="π", la
|
|
16 |
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
|
19 |
-
|
20 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
21 |
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
|
22 |
|
23 |
-
|
24 |
try:
|
25 |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
|
26 |
st.success("β
LLM initialized successfully. Using llama3-70b-8192")
|
@@ -31,6 +29,7 @@ except Exception as e:
|
|
31 |
embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
|
32 |
|
33 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
|
|
34 |
def fetch_financial_data(company_ticker):
|
35 |
if not company_ticker:
|
36 |
return "No ticker symbol provided. Please enter a valid company ticker."
|
@@ -61,19 +60,17 @@ def fetch_financial_data(company_ticker):
|
|
61 |
traceback.print_exc()
|
62 |
return "Error fetching financial data."
|
63 |
|
64 |
-
|
65 |
def extract_and_embed_text(pdf_file):
|
66 |
-
"""Processes PDFs and generates embeddings with GPU acceleration."""
|
67 |
try:
|
68 |
docs, tokenized_texts = [], []
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
tokenized_texts.append(chunk.split())
|
77 |
|
78 |
embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
|
79 |
|
@@ -121,7 +118,6 @@ def generate_response(user_query, company_ticker, mode, uploaded_file):
|
|
121 |
traceback.print_exc()
|
122 |
return "Error generating response."
|
123 |
|
124 |
-
|
125 |
st.markdown(
|
126 |
"<h1 style='text-align: center; color: #4CAF50;'>π AI-Powered Financial Insights Chatbot</h1>",
|
127 |
unsafe_allow_html=True
|
@@ -141,7 +137,6 @@ with col2:
|
|
141 |
st.markdown("### π **Enter Your Query**")
|
142 |
user_query = st.text_input("π¬ What financial insights are you looking for?")
|
143 |
|
144 |
-
st.markdown("---")
|
145 |
if mode == "π PDF Upload Mode":
|
146 |
st.markdown("### π Upload Your Financial Report")
|
147 |
uploaded_file = st.file_uploader("πΌ Upload PDF (Only for PDF Mode)", type=["pdf"])
|
@@ -157,7 +152,7 @@ if st.button("π Analyze Now"):
|
|
157 |
elif mode == "π Live Data Mode" and not company_ticker:
|
158 |
st.error("β Please enter a valid company ticker symbol.")
|
159 |
else:
|
160 |
-
with st.spinner("π Your Query is Processing, this can take
|
161 |
response = generate_response(user_query, company_ticker, mode, uploaded_file)
|
162 |
st.markdown("---")
|
163 |
st.markdown("<h3 style='color: #4CAF50;'>π‘ AI Response</h3>", unsafe_allow_html=True)
|
|
|
1 |
import streamlit as st
|
2 |
+
import pymupdf # Using pymupdf directly
|
3 |
import re
|
4 |
import traceback
|
5 |
import faiss
|
|
|
16 |
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
|
|
|
19 |
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
20 |
ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
|
21 |
|
|
|
22 |
try:
|
23 |
llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
|
24 |
st.success("β
LLM initialized successfully. Using llama3-70b-8192")
|
|
|
29 |
embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
|
30 |
|
31 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
32 |
+
|
33 |
def fetch_financial_data(company_ticker):
|
34 |
if not company_ticker:
|
35 |
return "No ticker symbol provided. Please enter a valid company ticker."
|
|
|
60 |
traceback.print_exc()
|
61 |
return "Error fetching financial data."
|
62 |
|
|
|
63 |
def extract_and_embed_text(pdf_file):
|
64 |
+
"""Processes PDFs and generates embeddings with GPU acceleration using pymupdf."""
|
65 |
try:
|
66 |
docs, tokenized_texts = [], []
|
67 |
+
|
68 |
+
with pymupdf.open(stream=pdf_file.read(), filetype="pdf") as doc:
|
69 |
+
full_text = "\n".join(page.get_text("text") for page in doc)
|
70 |
+
chunks = text_splitter.split_text(full_text)
|
71 |
+
for chunk in chunks:
|
72 |
+
docs.append(chunk)
|
73 |
+
tokenized_texts.append(chunk.split())
|
|
|
74 |
|
75 |
embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
|
76 |
|
|
|
118 |
traceback.print_exc()
|
119 |
return "Error generating response."
|
120 |
|
|
|
121 |
st.markdown(
|
122 |
"<h1 style='text-align: center; color: #4CAF50;'>π AI-Powered Financial Insights Chatbot</h1>",
|
123 |
unsafe_allow_html=True
|
|
|
137 |
st.markdown("### π **Enter Your Query**")
|
138 |
user_query = st.text_input("π¬ What financial insights are you looking for?")
|
139 |
|
|
|
140 |
if mode == "π PDF Upload Mode":
|
141 |
st.markdown("### π Upload Your Financial Report")
|
142 |
uploaded_file = st.file_uploader("πΌ Upload PDF (Only for PDF Mode)", type=["pdf"])
|
|
|
152 |
elif mode == "π Live Data Mode" and not company_ticker:
|
153 |
st.error("β Please enter a valid company ticker symbol.")
|
154 |
else:
|
155 |
+
with st.spinner("π Your Query is Processing, this can take up to 5 - 7 minutes β³"):
|
156 |
response = generate_response(user_query, company_ticker, mode, uploaded_file)
|
157 |
st.markdown("---")
|
158 |
st.markdown("<h3 style='color: #4CAF50;'>π‘ AI Response</h3>", unsafe_allow_html=True)
|