redfernstech commited on
Commit
bae8b12
·
verified ·
1 Parent(s): 9643dce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -8
app.py CHANGED
@@ -236,6 +236,8 @@ from fastapi.templating import Jinja2Templates
236
  from simple_salesforce import Salesforce, SalesforceLogin
237
  from langchain_groq import ChatGroq
238
  from langchain_core.prompts import ChatPromptTemplate
 
 
239
 
240
  # Configure logging
241
  logging.basicConfig(level=logging.INFO)
@@ -262,7 +264,7 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
262
  templates = Jinja2Templates(directory="static")
263
 
264
  # Validate environment variables
265
- required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain"]
266
  for var in required_env_vars:
267
  if not os.getenv(var):
268
  logger.error(f"Environment variable {var} is not set")
@@ -282,10 +284,13 @@ except Exception as e:
282
  logger.error(f"Failed to initialize Groq model: {e}")
283
  raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
284
 
 
 
 
285
  # Salesforce credentials
286
  username = os.getenv("username")
287
  password = os.getenv("password")
288
- security_token = os.getenv("security_token")
289
  domain = os.getenv("domain") # e.g., 'test' for sandbox
290
 
291
  # Initialize Salesforce connection
@@ -303,27 +308,71 @@ chat_history = []
303
  current_chat_history = []
304
  MAX_HISTORY_SIZE = 100 # Limit chat history size
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  def handle_query(query):
307
  # Prepare context from chat history
308
- context_str = ""
309
  for past_query, response in reversed(current_chat_history[-10:]): # Limit context to last 10 exchanges
310
  if past_query.strip():
311
- context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
312
 
313
- # Construct the prompt
 
 
 
 
 
 
 
 
 
 
 
314
  prompt_template = ChatPromptTemplate.from_messages([
315
  ("system", """
316
  You are the Clara Redfernstech chatbot. Provide accurate, professional answers in 10-15 words.
 
 
 
 
317
 
318
- Context:
319
- {context}
320
 
321
  Question:
322
  {query}
323
  """),
324
  ])
325
- prompt = prompt_template.format(context=context_str, query=query)
326
 
 
327
  try:
328
  response = llm.invoke(prompt)
329
  response_text = response.content.strip()
 
236
  from simple_salesforce import Salesforce, SalesforceLogin
237
  from langchain_groq import ChatGroq
238
  from langchain_core.prompts import ChatPromptTemplate
239
+ from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings
240
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
241
 
242
  # Configure logging
243
  logging.basicConfig(level=logging.INFO)
 
264
  templates = Jinja2Templates(directory="static")
265
 
266
  # Validate environment variables
267
+ required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
268
  for var in required_env_vars:
269
  if not os.getenv(var):
270
  logger.error(f"Environment variable {var} is not set")
 
284
  logger.error(f"Failed to initialize Groq model: {e}")
285
  raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
286
 
287
+ # Configure LlamaIndex settings
288
+ Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
289
+
290
  # Salesforce credentials
291
  username = os.getenv("username")
292
  password = os.getenv("password")
293
+ security_token = os.getenv("security_token zasi")
294
  domain = os.getenv("domain") # e.g., 'test' for sandbox
295
 
296
  # Initialize Salesforce connection
 
308
  current_chat_history = []
309
  MAX_HISTORY_SIZE = 100 # Limit chat history size
310
 
311
+ # Directories for data ingestion
312
+ PDF_DIRECTORY = "data"
313
+ PERSIST_DIR = "db"
314
+
315
+ # Ensure directories exist
316
+ os.makedirs(PDF_DIRECTORY, exist_ok=True)
317
+ os.makedirs(PERSIST_DIR, exist_ok=True)
318
+
319
+ def data_ingestion_from_directory():
320
+ try:
321
+ documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
322
+ storage_context = StorageContext.from_defaults()
323
+ index = VectorStoreIndex.from_documents(documents)
324
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
325
+ logger.info("Data ingestion completed successfully")
326
+ except Exception as e:
327
+ logger.error(f"Error during data ingestion: {e}")
328
+ raise HTTPException(status_code=500, detail=f"Data ingestion failed: {str(e)}")
329
+
330
+ def initialize():
331
+ try:
332
+ data_ingestion_from_directory() # Process PDF ingestion at startup
333
+ except Exception as e:
334
+ logger.error(f"Initialization failed: {e}")
335
+ raise HTTPException(status_code=500, detail="Initialization failed")
336
+
337
+ initialize() # Run initialization tasks
338
+
339
  def handle_query(query):
340
  # Prepare context from chat history
341
+ chat_context = ""
342
  for past_query, response in reversed(current_chat_history[-10:]): # Limit context to last 10 exchanges
343
  if past_query.strip():
344
+ chat_context += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
345
 
346
+ # Load vector index and retrieve relevant documents
347
+ try:
348
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
349
+ index = load_index_from_storage(storage_context)
350
+ query_engine = index.as_query_engine(similarity_top_k=2) # Retrieve top 2 relevant chunks
351
+ retrieved = query_engine.query(query)
352
+ doc_context = retrieved.response if hasattr(retrieved, 'response') else "No relevant documents found."
353
+ except Exception as e:
354
+ logger.error(f"Error retrieving documents: {e}")
355
+ doc_context = "Failed to retrieve documents."
356
+
357
+ # Construct the prompt with both chat and document context
358
  prompt_template = ChatPromptTemplate.from_messages([
359
  ("system", """
360
  You are the Clara Redfernstech chatbot. Provide accurate, professional answers in 10-15 words.
361
+ Use the document context and chat history to inform your response.
362
+
363
+ Document Context:
364
+ {doc_context}
365
 
366
+ Chat History:
367
+ {chat_context}
368
 
369
  Question:
370
  {query}
371
  """),
372
  ])
373
+ prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
374
 
375
+ # Query Groq model
376
  try:
377
  response = llm.invoke(prompt)
378
  response_text = response.content.strip()