svsaurav95 commited on
Commit
e904acc
Β·
verified Β·
1 Parent(s): 29c8f52

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- import pdfplumber
3
  import re
4
  import traceback
5
  import faiss
@@ -16,11 +16,9 @@ st.set_page_config(page_title="Financial Insights Chatbot", page_icon="πŸ“Š", la
16
 
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
19
-
20
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
  ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
22
 
23
-
24
  try:
25
  llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
26
  st.success("βœ… LLM initialized successfully. Using llama3-70b-8192")
@@ -31,6 +29,7 @@ except Exception as e:
31
  embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
32
 
33
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 
34
  def fetch_financial_data(company_ticker):
35
  if not company_ticker:
36
  return "No ticker symbol provided. Please enter a valid company ticker."
@@ -61,19 +60,17 @@ def fetch_financial_data(company_ticker):
61
  traceback.print_exc()
62
  return "Error fetching financial data."
63
 
64
-
65
  def extract_and_embed_text(pdf_file):
66
- """Processes PDFs and generates embeddings with GPU acceleration."""
67
  try:
68
  docs, tokenized_texts = [], []
69
- with pdfplumber.open(pdf_file) as pdf:
70
- for page in pdf.pages:
71
- text = page.extract_text()
72
- if text:
73
- chunks = text_splitter.split_text(text)
74
- for chunk in chunks:
75
- docs.append(chunk)
76
- tokenized_texts.append(chunk.split())
77
 
78
  embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
79
 
@@ -121,7 +118,6 @@ def generate_response(user_query, company_ticker, mode, uploaded_file):
121
  traceback.print_exc()
122
  return "Error generating response."
123
 
124
-
125
  st.markdown(
126
  "<h1 style='text-align: center; color: #4CAF50;'>πŸ“Š AI-Powered Financial Insights Chatbot</h1>",
127
  unsafe_allow_html=True
@@ -141,7 +137,6 @@ with col2:
141
  st.markdown("### πŸ”Ž **Enter Your Query**")
142
  user_query = st.text_input("πŸ’¬ What financial insights are you looking for?")
143
 
144
- st.markdown("---")
145
  if mode == "πŸ“„ PDF Upload Mode":
146
  st.markdown("### πŸ“‚ Upload Your Financial Report")
147
  uploaded_file = st.file_uploader("πŸ”Ό Upload PDF (Only for PDF Mode)", type=["pdf"])
@@ -157,7 +152,7 @@ if st.button("πŸš€ Analyze Now"):
157
  elif mode == "🌍 Live Data Mode" and not company_ticker:
158
  st.error("❌ Please enter a valid company ticker symbol.")
159
  else:
160
- with st.spinner("πŸ” Your Query is Processing, this can take upto 5 - 7 minutes⏳"):
161
  response = generate_response(user_query, company_ticker, mode, uploaded_file)
162
  st.markdown("---")
163
  st.markdown("<h3 style='color: #4CAF50;'>πŸ’‘ AI Response</h3>", unsafe_allow_html=True)
 
1
  import streamlit as st
2
+ import pymupdf # Using pymupdf directly
3
  import re
4
  import traceback
5
  import faiss
 
16
 
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
19
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20
  ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
21
 
 
22
  try:
23
  llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
24
  st.success("βœ… LLM initialized successfully. Using llama3-70b-8192")
 
29
  embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
30
 
31
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
32
+
33
  def fetch_financial_data(company_ticker):
34
  if not company_ticker:
35
  return "No ticker symbol provided. Please enter a valid company ticker."
 
60
  traceback.print_exc()
61
  return "Error fetching financial data."
62
 
 
63
  def extract_and_embed_text(pdf_file):
64
+ """Processes PDFs and generates embeddings with GPU acceleration using pymupdf."""
65
  try:
66
  docs, tokenized_texts = [], []
67
+
68
+ with pymupdf.open(stream=pdf_file.read(), filetype="pdf") as doc:
69
+ full_text = "\n".join(page.get_text("text") for page in doc)
70
+ chunks = text_splitter.split_text(full_text)
71
+ for chunk in chunks:
72
+ docs.append(chunk)
73
+ tokenized_texts.append(chunk.split())
 
74
 
75
  embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
76
 
 
118
  traceback.print_exc()
119
  return "Error generating response."
120
 
 
121
  st.markdown(
122
  "<h1 style='text-align: center; color: #4CAF50;'>πŸ“Š AI-Powered Financial Insights Chatbot</h1>",
123
  unsafe_allow_html=True
 
137
  st.markdown("### πŸ”Ž **Enter Your Query**")
138
  user_query = st.text_input("πŸ’¬ What financial insights are you looking for?")
139
 
 
140
  if mode == "πŸ“„ PDF Upload Mode":
141
  st.markdown("### πŸ“‚ Upload Your Financial Report")
142
  uploaded_file = st.file_uploader("πŸ”Ό Upload PDF (Only for PDF Mode)", type=["pdf"])
 
152
  elif mode == "🌍 Live Data Mode" and not company_ticker:
153
  st.error("❌ Please enter a valid company ticker symbol.")
154
  else:
155
+ with st.spinner("πŸ” Your Query is Processing, this can take up to 5 - 7 minutes ⏳"):
156
  response = generate_response(user_query, company_ticker, mode, uploaded_file)
157
  st.markdown("---")
158
  st.markdown("<h3 style='color: #4CAF50;'>πŸ’‘ AI Response</h3>", unsafe_allow_html=True)