Spaces:

svsaurav95
/

Financial_chat_bot

Running

App Files Files Community

svsaurav95 commited on Feb 8

Commit

e904acc

verified ·

1 Parent(s): 29c8f52

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -16

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-import pdfplumber
 import re
 import traceback
 import faiss
@@ -16,11 +16,9 @@ st.set_page_config(page_title="Financial Insights Chatbot", page_icon="📊", la
 device = "cuda" if torch.cuda.is_available() else "cpu"
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
 try:
     llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
     st.success("✅ LLM initialized successfully. Using llama3-70b-8192")
@@ -31,6 +29,7 @@ except Exception as e:
 embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 def fetch_financial_data(company_ticker):
     if not company_ticker:
         return "No ticker symbol provided. Please enter a valid company ticker."
@@ -61,19 +60,17 @@ def fetch_financial_data(company_ticker):
         traceback.print_exc()
         return "Error fetching financial data."
 def extract_and_embed_text(pdf_file):
-    """Processes PDFs and generates embeddings with GPU acceleration."""
     try:
         docs, tokenized_texts = [], []
-        with pdfplumber.open(pdf_file) as pdf:
-            for page in pdf.pages:
-                text = page.extract_text()
-                if text:
-                    chunks = text_splitter.split_text(text)
-                    for chunk in chunks:
-                        docs.append(chunk)
-                        tokenized_texts.append(chunk.split())
         embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
@@ -121,7 +118,6 @@ def generate_response(user_query, company_ticker, mode, uploaded_file):
         traceback.print_exc()
         return "Error generating response."
 st.markdown(
     "<h1 style='text-align: center; color: #4CAF50;'>📊 AI-Powered Financial Insights Chatbot</h1>",
     unsafe_allow_html=True
@@ -141,7 +137,6 @@ with col2:
     st.markdown("### 🔎 **Enter Your Query**")
     user_query = st.text_input("💬 What financial insights are you looking for?")
-st.markdown("---")
 if mode == "📄 PDF Upload Mode":
     st.markdown("### 📂 Upload Your Financial Report")
     uploaded_file = st.file_uploader("🔼 Upload PDF (Only for PDF Mode)", type=["pdf"])
@@ -157,7 +152,7 @@ if st.button("🚀 Analyze Now"):
     elif mode == "🌍 Live Data Mode" and not company_ticker:
         st.error("❌ Please enter a valid company ticker symbol.")
     else:
-        with st.spinner("🔍 Your Query is Processing, this can take upto 5 - 7 minutes⏳"):
             response = generate_response(user_query, company_ticker, mode, uploaded_file)
             st.markdown("---")
             st.markdown("<h3 style='color: #4CAF50;'>💡 AI Response</h3>", unsafe_allow_html=True)

 import streamlit as st
+import pymupdf  # Using pymupdf directly
 import re
 import traceback
 import faiss
 device = "cuda" if torch.cuda.is_available() else "cpu"
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
 try:
     llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
     st.success("✅ LLM initialized successfully. Using llama3-70b-8192")
 embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 def fetch_financial_data(company_ticker):
     if not company_ticker:
         return "No ticker symbol provided. Please enter a valid company ticker."
         traceback.print_exc()
         return "Error fetching financial data."
 def extract_and_embed_text(pdf_file):
+    """Processes PDFs and generates embeddings with GPU acceleration using pymupdf."""
     try:
         docs, tokenized_texts = [], []
+        with pymupdf.open(stream=pdf_file.read(), filetype="pdf") as doc:
+            full_text = "\n".join(page.get_text("text") for page in doc)
+            chunks = text_splitter.split_text(full_text)
+            for chunk in chunks:
+                docs.append(chunk)
+                tokenized_texts.append(chunk.split())
         embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
         traceback.print_exc()
         return "Error generating response."
 st.markdown(
     "<h1 style='text-align: center; color: #4CAF50;'>📊 AI-Powered Financial Insights Chatbot</h1>",
     unsafe_allow_html=True
     st.markdown("### 🔎 **Enter Your Query**")
     user_query = st.text_input("💬 What financial insights are you looking for?")
 if mode == "📄 PDF Upload Mode":
     st.markdown("### 📂 Upload Your Financial Report")
     uploaded_file = st.file_uploader("🔼 Upload PDF (Only for PDF Mode)", type=["pdf"])
     elif mode == "🌍 Live Data Mode" and not company_ticker:
         st.error("❌ Please enter a valid company ticker symbol.")
     else:
+        with st.spinner("🔍 Your Query is Processing, this can take up to 5 - 7 minutes ⏳"):
             response = generate_response(user_query, company_ticker, mode, uploaded_file)
             st.markdown("---")
             st.markdown("<h3 style='color: #4CAF50;'>💡 AI Response</h3>", unsafe_allow_html=True)