redfernstech commited on
Commit
195dc2b
·
verified ·
1 Parent(s): 3c91df4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -115
app.py CHANGED
@@ -236,22 +236,18 @@ from fastapi.templating import Jinja2Templates
236
  from simple_salesforce import Salesforce, SalesforceLogin
237
  from langchain_groq import ChatGroq
238
  from langchain_core.prompts import ChatPromptTemplate
239
- from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings
240
- from llama_index.core import load_index_from_storage # Added missing import
241
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
242
 
243
  # Configure logging
244
  logging.basicConfig(level=logging.INFO)
245
  logger = logging.getLogger(__name__)
246
 
247
- # Define Pydantic model for incoming request body
248
  class MessageRequest(BaseModel):
249
  message: str
250
 
251
- # Initialize FastAPI app
252
  app = FastAPI()
253
 
254
- # Allow CORS requests (restrict in production)
255
  app.add_middleware(
256
  CORSMiddleware,
257
  allow_origins=["*"],
@@ -260,61 +256,40 @@ app.add_middleware(
260
  allow_headers=["*"],
261
  )
262
 
263
- # Mount static files
264
  app.mount("/static", StaticFiles(directory="static"), name="static")
265
  templates = Jinja2Templates(directory="static")
266
 
267
- # Validate environment variables
268
  required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
269
  for var in required_env_vars:
270
  if not os.getenv(var):
271
- logger.error(f"Environment variable {var} is not set")
272
  raise ValueError(f"Environment variable {var} is not set")
273
 
274
- # Initialize Groq model
275
  GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
276
- GROQ_MODEL = "llama3-8b-8192"
277
- try:
278
- llm = ChatGroq(
279
- model_name=GROQ_MODEL,
280
- api_key=GROQ_API_KEY,
281
- temperature=0.1,
282
- max_tokens=50
283
- )
284
- except Exception as e:
285
- logger.error(f"Failed to initialize Groq model: {e}")
286
- raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
287
-
288
- # Configure LlamaIndex settings
289
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
290
 
291
- # Salesforce credentials
292
- username = os.getenv("username")
293
- password = os.getenv("password")
294
- security_token = os.getenv("security_token")
295
- domain = os.getenv("domain") # e.g., 'test' for sandbox
296
-
297
- # Initialize Salesforce connection (allow failure)
298
  sf = None
299
  try:
300
  session_id, sf_instance = SalesforceLogin(
301
- username=username, password=password, security_token=security_token, domain=domain
 
 
 
302
  )
303
  sf = Salesforce(instance=sf_instance, session_id=session_id)
304
- logger.info("Salesforce connection established")
305
  except Exception as e:
306
- logger.error(f"Failed to connect to Salesforce: {e}. Continuing without Salesforce integration.")
307
 
308
- # Chat history
309
  chat_history = []
310
  current_chat_history = []
311
- MAX_HISTORY_SIZE = 100 # Limit chat history size
312
 
313
- # Directories for data ingestion
314
  PDF_DIRECTORY = "data"
315
  PERSIST_DIR = "db"
316
-
317
- # Ensure directories exist
318
  os.makedirs(PDF_DIRECTORY, exist_ok=True)
319
  os.makedirs(PERSIST_DIR, exist_ok=True)
320
 
@@ -327,70 +302,68 @@ def data_ingestion_from_directory():
327
  storage_context = StorageContext.from_defaults()
328
  index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
329
  index.storage_context.persist(persist_dir=PERSIST_DIR)
330
- logger.info("Data ingestion completed successfully")
331
  except Exception as e:
332
- logger.error(f"Error during data ingestion: {e}")
333
- raise HTTPException(status_code=500, detail=f"Data ingestion failed: {str(e)}")
334
 
335
  def initialize():
336
  try:
337
- data_ingestion_from_directory() # Process PDF ingestion at startup
338
  except Exception as e:
339
- logger.error(f"Initialization failed: {e}")
340
- raise HTTPException(status_code=500, detail="Initialization failed")
341
 
342
- initialize() # Run initialization tasks
343
 
344
- def handle_query(query):
345
- # Prepare context from chat history
346
  chat_context = ""
347
- for past_query, response in reversed(current_chat_history[-10:]): # Limit to last 10 exchanges
348
- if past_query.strip():
349
- chat_context += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
350
 
351
- # Load vector index and retrieve relevant documents
352
  try:
353
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
354
  index = load_index_from_storage(storage_context)
355
  query_engine = index.as_query_engine(similarity_top_k=2)
356
  retrieved = query_engine.query(query)
357
- doc_context = retrieved.response if hasattr(retrieved, 'response') else "No relevant documents found."
358
- logger.info(f"Retrieved documents for query '{query}': {doc_context[:100]}...")
359
  except Exception as e:
360
- logger.error(f"Error retrieving documents: {e}")
361
- doc_context = "Failed to retrieve documents."
362
 
363
- # Construct the prompt with Redferns Tech focus
364
  prompt_template = ChatPromptTemplate.from_messages([
365
  ("system", """
366
- You are Clara Redfernstech, a chatbot for Redferns Tech, a leader in data science, machine learning, and AI solutions.
367
- Provide accurate, professional answers in 10-15 words based on the provided document context and chat history.
368
- Focus on Redferns Tech's expertise in data science and AI.
369
 
370
- Document Context:
371
- {doc_context}
372
 
373
- Chat History:
374
- {chat_context}
375
 
376
- Question:
377
- {query}
378
- """),
379
  ])
380
  prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
381
 
382
- # Query Groq model
383
  try:
384
  response = llm.invoke(prompt)
385
  response_text = response.content.strip()
 
 
386
  except Exception as e:
387
- logger.error(f"Error querying Groq API: {e}")
388
- response_text = "Sorry, I couldn't find an answer."
389
 
390
- # Update chat history
391
  if len(current_chat_history) >= MAX_HISTORY_SIZE:
392
  current_chat_history.pop(0)
393
  current_chat_history.append((query, response_text))
 
394
  return response_text
395
 
396
  @app.get("/ch/{id}", response_class=HTMLResponse)
@@ -400,62 +373,52 @@ async def load_chat(request: Request, id: str):
400
  @app.post("/hist/")
401
  async def save_chat_history(history: dict):
402
  if not sf:
403
- logger.error("Salesforce integration is disabled")
404
- return {"error": "Salesforce integration is unavailable"}, 503
405
 
406
  user_id = history.get('userId')
407
  if not user_id:
408
- logger.error("userId is missing in history request")
409
- return {"error": "userId is required"}, 400
410
 
411
- hist = ''.join([f"'{entry['sender']}: {entry['message']}'\n" for entry in history['history']])
412
- hist = "You are a Redfernstech summarize model. Identify user interests from this conversation: " + hist
413
 
414
  try:
415
- sf.Lead.update(user_id, {'Description': hist})
416
- logger.info(f"Chat history updated for user {user_id}")
417
  except Exception as e:
418
- logger.error(f"Failed to update lead: {e}")
419
- return {"error": f"Failed to update lead: {str(e)}"}, 500
420
-
421
- return {"summary": hist, "message": "Chat history saved"}
422
 
423
  @app.post("/webhook")
424
  async def receive_form_data(request: Request):
425
  if not sf:
426
- logger.error("Salesforce integration is disabled")
427
- return {"error": "Salesforce integration is unavailable"}, 503
428
 
429
  try:
430
  form_data = await request.json()
431
  except json.JSONDecodeError:
432
- logger.error("Invalid JSON in webhook request")
433
- return {"error": "Invalid JSON"}, 400
434
-
435
- first_name, last_name = split_name(form_data.get('name', ''))
436
- data = {
437
- 'FirstName': first_name,
438
- 'LastName': last_name,
439
- 'Description': 'hii',
440
- 'Company': form_data.get('company', ''),
441
- 'Phone': form_data.get('phone', '').strip(),
442
- 'Email': form_data.get('email', ''),
443
  }
444
 
445
  try:
446
- result = sf.Lead.create(data)
447
- unique_id = result['id']
448
- logger.info(f"Lead created with ID {unique_id}")
449
- return JSONResponse({"id": unique_id})
450
  except Exception as e:
451
- logger.error(f"Failed to create lead: {e}")
452
- return {"error": f"Failed to create lead: {str(e)}"}, 500
453
 
454
  @app.post("/chat/")
455
  async def chat(request: MessageRequest):
456
  message = request.message
457
  response = handle_query(message)
458
- message_data = {
459
  "sender": "User",
460
  "message": message,
461
  "response": response,
@@ -463,30 +426,24 @@ async def chat(request: MessageRequest):
463
  }
464
  if len(chat_history) >= MAX_HISTORY_SIZE:
465
  chat_history.pop(0)
466
- chat_history.append(message_data)
467
- logger.info(f"Chat message processed: {message}")
468
  return {"response": response}
469
 
470
  @app.get("/health")
471
  async def health_check():
472
  try:
473
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
474
- index = load_index_from_storage(storage_context)
475
- logger.info("Vector index loaded successfully")
476
- return {"status": "healthy", "pdf_ingestion": "successful"}
477
  except Exception as e:
478
- logger.error(f"Health check failed: {e}")
479
  return {"status": "unhealthy", "error": str(e)}
480
 
481
  @app.get("/")
482
  def read_root():
483
- return {"message": "Welcome to the Redferns Tech Chatbot API"}
484
 
485
  def split_name(full_name):
486
- words = full_name.strip().split()
487
- if len(words) == 1:
488
- return '', words[0]
489
- elif len(words) == 2:
490
- return words[0], words[1]
491
- else:
492
- return words[0], ' '.join(words[1:])
 
236
  from simple_salesforce import Salesforce, SalesforceLogin
237
  from langchain_groq import ChatGroq
238
  from langchain_core.prompts import ChatPromptTemplate
239
+ from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings, load_index_from_storage
 
240
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
241
 
242
  # Configure logging
243
  logging.basicConfig(level=logging.INFO)
244
  logger = logging.getLogger(__name__)
245
 
 
246
  class MessageRequest(BaseModel):
247
  message: str
248
 
 
249
  app = FastAPI()
250
 
 
251
  app.add_middleware(
252
  CORSMiddleware,
253
  allow_origins=["*"],
 
256
  allow_headers=["*"],
257
  )
258
 
 
259
  app.mount("/static", StaticFiles(directory="static"), name="static")
260
  templates = Jinja2Templates(directory="static")
261
 
 
262
  required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
263
  for var in required_env_vars:
264
  if not os.getenv(var):
265
+ logger.error(f"Missing environment variable: {var}")
266
  raise ValueError(f"Environment variable {var} is not set")
267
 
268
+ # LLM & Embedding Setup
269
  GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
270
+ llm = ChatGroq(model_name="llama3-8b-8192", api_key=GROQ_API_KEY, temperature=0.1, max_tokens=50)
 
 
 
 
 
 
 
 
 
 
 
 
271
  Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
272
 
273
+ # Salesforce setup
 
 
 
 
 
 
274
  sf = None
275
  try:
276
  session_id, sf_instance = SalesforceLogin(
277
+ username=os.getenv("username"),
278
+ password=os.getenv("password"),
279
+ security_token=os.getenv("security_token"),
280
+ domain=os.getenv("domain")
281
  )
282
  sf = Salesforce(instance=sf_instance, session_id=session_id)
283
+ logger.info("Salesforce connected.")
284
  except Exception as e:
285
+ logger.warning(f"Salesforce connection failed: {e}")
286
 
 
287
  chat_history = []
288
  current_chat_history = []
289
+ MAX_HISTORY_SIZE = 100
290
 
 
291
  PDF_DIRECTORY = "data"
292
  PERSIST_DIR = "db"
 
 
293
  os.makedirs(PDF_DIRECTORY, exist_ok=True)
294
  os.makedirs(PERSIST_DIR, exist_ok=True)
295
 
 
302
  storage_context = StorageContext.from_defaults()
303
  index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
304
  index.storage_context.persist(persist_dir=PERSIST_DIR)
305
+ logger.info("Data ingestion and embedding complete.")
306
  except Exception as e:
307
+ logger.error(f"Data ingestion failed: {e}")
308
+ raise HTTPException(status_code=500, detail="Data ingestion failed")
309
 
310
  def initialize():
311
  try:
312
+ data_ingestion_from_directory()
313
  except Exception as e:
314
+ logger.error(f"Initialization error: {e}")
315
+ raise HTTPException(status_code=500, detail="Startup initialization failed")
316
 
317
+ initialize()
318
 
319
+ def handle_query(query: str) -> str:
 
320
  chat_context = ""
321
+ for past_query, response in reversed(current_chat_history[-10:]):
322
+ chat_context += f"User: {past_query}\nBot: {response}\n"
 
323
 
324
+ # Load index
325
  try:
326
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
327
  index = load_index_from_storage(storage_context)
328
  query_engine = index.as_query_engine(similarity_top_k=2)
329
  retrieved = query_engine.query(query)
330
+ doc_context = getattr(retrieved, 'response', "No relevant documents found.")
 
331
  except Exception as e:
332
+ logger.error(f"Retrieval error: {e}")
333
+ doc_context = "No relevant documents found."
334
 
335
+ # Prompt template
336
  prompt_template = ChatPromptTemplate.from_messages([
337
  ("system", """
338
+ You are a helpful and professional company chatbot.
339
+ Answer user queries based on the provided document context and chat history.
340
+ If you are unsure about the answer, politely respond with "I'm sorry, I don't know that yet."
341
 
342
+ Document Context:
343
+ {doc_context}
344
 
345
+ Chat History:
346
+ {chat_context}
347
 
348
+ Question:
349
+ {query}
350
+ """)
351
  ])
352
  prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
353
 
 
354
  try:
355
  response = llm.invoke(prompt)
356
  response_text = response.content.strip()
357
+ if "I'm sorry" not in response_text and len(response_text.strip()) < 3:
358
+ response_text = "I'm sorry, I don't know that yet."
359
  except Exception as e:
360
+ logger.error(f"Groq API Error: {e}")
361
+ response_text = "I'm sorry, I don't know that yet."
362
 
 
363
  if len(current_chat_history) >= MAX_HISTORY_SIZE:
364
  current_chat_history.pop(0)
365
  current_chat_history.append((query, response_text))
366
+
367
  return response_text
368
 
369
  @app.get("/ch/{id}", response_class=HTMLResponse)
 
373
  @app.post("/hist/")
374
  async def save_chat_history(history: dict):
375
  if not sf:
376
+ return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
 
377
 
378
  user_id = history.get('userId')
379
  if not user_id:
380
+ return JSONResponse({"error": "userId missing"}, status_code=400)
 
381
 
382
+ hist = '\n'.join([f"{entry['sender']}: {entry['message']}" for entry in history.get("history", [])])
383
+ summary = "This is the chat summary: " + hist
384
 
385
  try:
386
+ sf.Lead.update(user_id, {'Description': summary})
387
+ return {"summary": summary, "message": "Chat history saved"}
388
  except Exception as e:
389
+ return JSONResponse({"error": str(e)}, status_code=500)
 
 
 
390
 
391
  @app.post("/webhook")
392
  async def receive_form_data(request: Request):
393
  if not sf:
394
+ return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
 
395
 
396
  try:
397
  form_data = await request.json()
398
  except json.JSONDecodeError:
399
+ return JSONResponse({"error": "Invalid JSON"}, status_code=400)
400
+
401
+ first_name, last_name = split_name(form_data.get("name", ""))
402
+ lead_data = {
403
+ "FirstName": first_name,
404
+ "LastName": last_name,
405
+ "Company": form_data.get("company", ""),
406
+ "Phone": form_data.get("phone", ""),
407
+ "Email": form_data.get("email", ""),
408
+ "Description": "Lead from website form"
 
409
  }
410
 
411
  try:
412
+ result = sf.Lead.create(lead_data)
413
+ return {"id": result.get("id")}
 
 
414
  except Exception as e:
415
+ return JSONResponse({"error": str(e)}, status_code=500)
 
416
 
417
  @app.post("/chat/")
418
  async def chat(request: MessageRequest):
419
  message = request.message
420
  response = handle_query(message)
421
+ chat_entry = {
422
  "sender": "User",
423
  "message": message,
424
  "response": response,
 
426
  }
427
  if len(chat_history) >= MAX_HISTORY_SIZE:
428
  chat_history.pop(0)
429
+ chat_history.append(chat_entry)
 
430
  return {"response": response}
431
 
432
  @app.get("/health")
433
  async def health_check():
434
  try:
435
  storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
436
+ load_index_from_storage(storage_context)
437
+ return {"status": "healthy"}
 
438
  except Exception as e:
 
439
  return {"status": "unhealthy", "error": str(e)}
440
 
441
  @app.get("/")
442
  def read_root():
443
+ return {"message": "Welcome to the company chatbot API"}
444
 
445
  def split_name(full_name):
446
+ parts = full_name.strip().split()
447
+ if len(parts) == 1:
448
+ return '', parts[0]
449
+ return parts[0], ' '.join(parts[1:])