svsaurav95 commited on
Commit
157ff13
Β·
verified Β·
1 Parent(s): 04dff86
Files changed (1) hide show
  1. app.py +104 -38
app.py CHANGED
@@ -1,14 +1,25 @@
1
  import streamlit as st
2
- import requests
3
- import pymupdf
4
  import traceback
 
 
 
 
5
  from sentence_transformers import SentenceTransformer
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain_groq import ChatGroq
 
 
 
 
 
 
 
 
 
 
8
 
9
- ALPHA_VANTAGE_API_KEY = st.secrets["ALPHA_VANTAGE_API_KEY"]
10
- GROQ_API_KEY = st.secrets["GROQ_API_KEY"]
11
- embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
12
 
13
  try:
14
  llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
@@ -17,19 +28,9 @@ except Exception as e:
17
  st.error("❌ Failed to initialize Groq LLM.")
18
  traceback.print_exc()
19
 
20
- def extract_text_from_pdf(uploaded_file, max_length=5000):
21
- try:
22
- doc = pymupdf.open(stream=uploaded_file.read(), filetype="pdf")
23
- full_text = "".join(page.get_text() for page in doc)
24
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=max_length, chunk_overlap=200)
25
- chunks = text_splitter.split_text(full_text)
26
-
27
- return chunks
28
- except Exception as e:
29
- st.error("❌ Failed to extract text from PDF.")
30
- traceback.print_exc()
31
- return ["Error extracting text."]
32
 
 
33
  def fetch_financial_data(company_ticker):
34
  if not company_ticker:
35
  return "No ticker symbol provided. Please enter a valid company ticker."
@@ -37,13 +38,13 @@ def fetch_financial_data(company_ticker):
37
  try:
38
  overview_url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
39
  overview_response = requests.get(overview_url)
40
-
41
  if overview_response.status_code == 200:
42
  overview_data = overview_response.json()
43
  market_cap = overview_data.get("MarketCapitalization", "N/A")
44
  else:
45
- st.error(f"❌ Failed to fetch company overview. Status Code: {overview_response.status_code}")
46
  return "Error fetching company overview."
 
47
  income_url = f"https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
48
  income_response = requests.get(income_url)
49
 
@@ -52,49 +53,114 @@ def fetch_financial_data(company_ticker):
52
  annual_reports = income_data.get("annualReports", [])
53
  revenue = annual_reports[0].get("totalRevenue", "N/A") if annual_reports else "N/A"
54
  else:
55
- st.error(f"❌ Failed to fetch income statement. Status Code: {income_response.status_code}")
56
  return "Error fetching income statement."
57
 
58
  return f"Market Cap: ${market_cap}\nTotal Revenue: ${revenue}"
59
 
60
  except Exception as e:
61
- st.error("❌ Exception in fetching financial data.")
62
  traceback.print_exc()
63
  return "Error fetching financial data."
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def generate_response(user_query, company_ticker, mode, uploaded_file):
66
  try:
67
- if mode == "PDF Upload Mode":
68
- chunks = extract_text_from_pdf(uploaded_file)
69
- chunked_summary = "\n\n".join(chunks[:3])
70
- prompt = f"Summarize the key financial insights from this document:\n\n{chunked_summary}"
71
- elif mode == "Live Data Mode":
 
 
 
 
 
72
  financial_info = fetch_financial_data(company_ticker)
73
  prompt = f"Analyze the financial status of {company_ticker} based on:\n{financial_info}\n\nUser Query: {user_query}"
74
  else:
75
  return "Invalid mode selected."
76
-
77
  response = llm.invoke(prompt)
78
  return response.content
79
  except Exception as e:
80
- st.error("❌ Failed to generate AI response.")
81
  traceback.print_exc()
82
  return "Error generating response."
83
 
84
- st.title("πŸ“Š AI-Powered Financial Insights Chatbot")
85
- st.write("Upload financial reports or fetch live financial data ")
86
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- user_query = st.text_input("Enter your query:")
89
- company_ticker = st.text_input("Enter company ticker symbol")
90
- mode = st.radio("Select Mode:", ["PDF Upload Mode", "Live Data Mode"])
91
- uploaded_file = st.file_uploader("Upload PDF (Only for PDF Mode)", type=["pdf"])
92
 
93
- if st.button("Analyze"):
94
- if mode == "PDF Upload Mode" and not uploaded_file:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  st.error("❌ Please upload a PDF file.")
 
 
96
  else:
97
- with st.spinner("Processing... ⏳"):
98
  response = generate_response(user_query, company_ticker, mode, uploaded_file)
99
- st.subheader("πŸ’‘ AI Response")
 
100
  st.write(response)
 
 
 
1
  import streamlit as st
2
+ import pdfplumber
3
+ import re
4
  import traceback
5
+ import faiss
6
+ import numpy as np
7
+ import requests
8
+ from rank_bm25 import BM25Okapi
9
  from sentence_transformers import SentenceTransformer
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain_groq import ChatGroq
12
+ import torch
13
+ import os
14
+
15
+ st.set_page_config(page_title="Financial Insights Chatbot", page_icon="πŸ“Š", layout="wide")
16
+
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+
19
+
20
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
21
+ ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
22
 
 
 
 
23
 
24
  try:
25
  llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
 
28
  st.error("❌ Failed to initialize Groq LLM.")
29
  traceback.print_exc()
30
 
31
+ embedding_model = SentenceTransformer("baconnier/Finance2_embedding_small_en-V1.5", device=device)
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
34
  def fetch_financial_data(company_ticker):
35
  if not company_ticker:
36
  return "No ticker symbol provided. Please enter a valid company ticker."
 
38
  try:
39
  overview_url = f"https://www.alphavantage.co/query?function=OVERVIEW&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
40
  overview_response = requests.get(overview_url)
41
+
42
  if overview_response.status_code == 200:
43
  overview_data = overview_response.json()
44
  market_cap = overview_data.get("MarketCapitalization", "N/A")
45
  else:
 
46
  return "Error fetching company overview."
47
+
48
  income_url = f"https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={company_ticker}&apikey={ALPHA_VANTAGE_API_KEY}"
49
  income_response = requests.get(income_url)
50
 
 
53
  annual_reports = income_data.get("annualReports", [])
54
  revenue = annual_reports[0].get("totalRevenue", "N/A") if annual_reports else "N/A"
55
  else:
 
56
  return "Error fetching income statement."
57
 
58
  return f"Market Cap: ${market_cap}\nTotal Revenue: ${revenue}"
59
 
60
  except Exception as e:
 
61
  traceback.print_exc()
62
  return "Error fetching financial data."
63
 
64
+
65
+ def extract_and_embed_text(pdf_file):
66
+ """Processes PDFs and generates embeddings with GPU acceleration."""
67
+ try:
68
+ docs, tokenized_texts = [], []
69
+ with pdfplumber.open(pdf_file) as pdf:
70
+ for page in pdf.pages:
71
+ text = page.extract_text()
72
+ if text:
73
+ chunks = text_splitter.split_text(text)
74
+ for chunk in chunks:
75
+ docs.append(chunk)
76
+ tokenized_texts.append(chunk.split())
77
+
78
+ embeddings = embedding_model.encode(docs, batch_size=64, convert_to_numpy=True, normalize_embeddings=True)
79
+
80
+ embedding_dim = embeddings.shape[1]
81
+ index = faiss.IndexHNSWFlat(embedding_dim, 32)
82
+ index.add(embeddings)
83
+
84
+ bm25 = BM25Okapi(tokenized_texts)
85
+
86
+ return docs, embeddings, index, bm25
87
+ except Exception as e:
88
+ traceback.print_exc()
89
+ return [], [], None, None
90
+
91
+ def retrieve_relevant_docs(user_query, docs, index, bm25):
92
+ """Hybrid search using FAISS cosine similarity & BM25 keyword retrieval."""
93
+ query_embedding = embedding_model.encode(user_query, convert_to_numpy=True, normalize_embeddings=True)
94
+ _, faiss_indices = index.search(np.array([query_embedding]), 8)
95
+ bm25_scores = bm25.get_scores(user_query.split())
96
+ bm25_indices = np.argsort(bm25_scores)[::-1][:8]
97
+ combined_indices = list(set(faiss_indices[0]) | set(bm25_indices))
98
+
99
+ return [docs[i] for i in combined_indices[:3]]
100
+
101
  def generate_response(user_query, company_ticker, mode, uploaded_file):
102
  try:
103
+ if mode == "πŸ“„ PDF Upload Mode":
104
+ docs, embeddings, index, bm25 = extract_and_embed_text(uploaded_file)
105
+ if not docs:
106
+ return "❌ Error extracting text from PDF."
107
+
108
+ retrieved_docs = retrieve_relevant_docs(user_query, docs, index, bm25)
109
+ context = "\n\n".join(retrieved_docs)
110
+ prompt = f"Summarize the key financial insights from this document:\n\n{context}"
111
+
112
+ elif mode == "🌍 Live Data Mode":
113
  financial_info = fetch_financial_data(company_ticker)
114
  prompt = f"Analyze the financial status of {company_ticker} based on:\n{financial_info}\n\nUser Query: {user_query}"
115
  else:
116
  return "Invalid mode selected."
117
+
118
  response = llm.invoke(prompt)
119
  return response.content
120
  except Exception as e:
 
121
  traceback.print_exc()
122
  return "Error generating response."
123
 
 
 
124
 
125
+ st.markdown(
126
+ "<h1 style='text-align: center; color: #4CAF50;'>πŸ“Š AI-Powered Financial Insights Chatbot</h1>",
127
+ unsafe_allow_html=True
128
+ )
129
+ st.markdown(
130
+ "<h5 style='text-align: center; color: #666;'>Analyze financial reports or fetch live financial data effortlessly!</h5>",
131
+ unsafe_allow_html=True
132
+ )
133
+
134
+ col1, col2 = st.columns(2)
135
 
136
+ with col1:
137
+ st.markdown("### 🏒 **Choose Your Analysis Mode**")
138
+ mode = st.radio("", ["πŸ“„ PDF Upload Mode", "🌍 Live Data Mode"], horizontal=True)
 
139
 
140
+ with col2:
141
+ st.markdown("### πŸ”Ž **Enter Your Query**")
142
+ user_query = st.text_input("πŸ’¬ What financial insights are you looking for?")
143
+
144
+ st.markdown("---")
145
+ if mode == "πŸ“„ PDF Upload Mode":
146
+ st.markdown("### πŸ“‚ Upload Your Financial Report")
147
+ uploaded_file = st.file_uploader("πŸ”Ό Upload PDF (Only for PDF Mode)", type=["pdf"])
148
+ company_ticker = None
149
+ else:
150
+ st.markdown("### 🌍 Live Market Data")
151
+ company_ticker = st.text_input("🏒 Enter Company Ticker Symbol", placeholder="e.g., AAPL, MSFT")
152
+ uploaded_file = None
153
+
154
+ if st.button("πŸš€ Analyze Now"):
155
+ if mode == "πŸ“„ PDF Upload Mode" and not uploaded_file:
156
  st.error("❌ Please upload a PDF file.")
157
+ elif mode == "🌍 Live Data Mode" and not company_ticker:
158
+ st.error("❌ Please enter a valid company ticker symbol.")
159
  else:
160
+ with st.spinner("πŸ” Your Query is Processing, this can take upto 5 - 7 minutes⏳"):
161
  response = generate_response(user_query, company_ticker, mode, uploaded_file)
162
+ st.markdown("---")
163
+ st.markdown("<h3 style='color: #4CAF50;'>πŸ’‘ AI Response</h3>", unsafe_allow_html=True)
164
  st.write(response)
165
+
166
+ st.markdown("---")