redfernstech commited on
Commit
6af1e9b
·
verified ·
1 Parent(s): 4cfe99e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -17
app.py CHANGED
@@ -232,6 +232,7 @@
232
 
233
  import os
234
  import time
 
235
  from fastapi import FastAPI, Request
236
  from fastapi.responses import HTMLResponse
237
  from fastapi.staticfiles import StaticFiles
@@ -246,6 +247,10 @@ from fastapi.templating import Jinja2Templates
246
  from simple_salesforce import Salesforce, SalesforceLogin
247
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
248
 
 
 
 
 
249
  # Define Pydantic model for incoming request body
250
  class MessageRequest(BaseModel):
251
  message: str
@@ -294,7 +299,7 @@ Settings.llm = HuggingFaceLLM(
294
  tokenizer_name="google/flan-t5-small",
295
  context_window=512, # flan-t5-small has a max context window of 512 tokens
296
  max_new_tokens=256,
297
- generate_kwargs={"temperature": 0.1, "do_sample": True},
298
  model=AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small"),
299
  tokenizer=tokenizer,
300
  device_map="auto" # Automatically use GPU if available, else CPU
@@ -313,15 +318,23 @@ chat_history = []
313
  current_chat_history = []
314
 
315
  def data_ingestion_from_directory():
316
- documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
317
- storage_context = StorageContext.from_defaults()
318
- index = VectorStoreIndex.from_documents(documents)
319
- index.storage_context.persist(persist_dir=PERSIST_DIR)
 
 
 
 
 
 
 
 
320
 
321
  def initialize():
322
  start_time = time.time()
323
  data_ingestion_from_directory() # Process PDF ingestion at startup
324
- print(f"Data ingestion time: {time.time() - start_time} seconds")
325
 
326
  def split_name(full_name):
327
  # Split the name by spaces
@@ -343,10 +356,10 @@ def split_name(full_name):
343
  initialize() # Run initialization tasks
344
 
345
  def handle_query(query):
346
- # Custom prompt template for flan-t5-small (no chat template)
347
  text_qa_template = PromptTemplate(
348
  """
349
- You are Clara, a Redfernstech chatbot. Provide accurate, concise answers (10-15 words) based on company data.
350
  Context: {context_str}
351
  Question: {query_str}
352
  Answer:
@@ -360,15 +373,22 @@ def handle_query(query):
360
  if past_query.strip():
361
  context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
362
 
 
 
 
363
  query_engine = index.as_query_engine(text_qa_template=text_qa_template)
364
  answer = query_engine.query(query)
365
-
366
- if hasattr(answer, "response"):
367
- response = answer.response
368
- elif isinstance(answer, dict) and "response" in answer:
369
- response = answer["response"]
 
 
370
  else:
371
  response = "Sorry, I couldn't find an answer."
 
 
372
  current_chat_history.append((query, response))
373
  return response
374
 
@@ -380,7 +400,7 @@ async def load_chat(request: Request, id: str):
380
  async def save_chat_history(history: dict):
381
  # Check if 'userId' is present in the incoming dictionary
382
  user_id = history.get("userId")
383
- print(user_id)
384
 
385
  # Ensure user_id is defined before proceeding
386
  if user_id is None:
@@ -389,7 +409,7 @@ async def save_chat_history(history: dict):
389
  # Construct the chat history string
390
  hist = "".join([f"'{entry['sender']}: {entry['message']}'\n" for entry in history["history"]])
391
  hist = "You are a Redfernstech summarize model. Your aim is to use this conversation to identify user interests solely based on that conversation: " + hist
392
- print(hist)
393
 
394
  # Get the summarized result
395
  result = hist
@@ -397,6 +417,7 @@ async def save_chat_history(history: dict):
397
  try:
398
  sf.Lead.update(user_id, {"Description": result})
399
  except Exception as e:
 
400
  return {"error": f"Failed to update lead: {str(e)}"}, 500
401
 
402
  return {"summary": result, "message": "Chat history saved"}
@@ -418,8 +439,7 @@ async def receive_form_data(request: Request):
418
  # Generate a unique ID (for tracking user)
419
  unique_id = a["id"]
420
 
421
- # Here you can do something with form_data like saving it to a database
422
- print("Received form data:", form_data)
423
 
424
  # Send back the unique id to the frontend
425
  return JSONResponse({"id": unique_id})
@@ -427,6 +447,7 @@ async def receive_form_data(request: Request):
427
  @app.post("/chat/")
428
  async def chat(request: MessageRequest):
429
  message = request.message # Access the message from the request body
 
430
  response = handle_query(message) # Process the message
431
  message_data = {
432
  "sender": "User",
@@ -435,6 +456,7 @@ async def chat(request: MessageRequest):
435
  "timestamp": datetime.datetime.now().isoformat()
436
  }
437
  chat_history.append(message_data)
 
438
  return {"response": response}
439
 
440
  @app.get("/")
 
232
 
233
  import os
234
  import time
235
+ import logging
236
  from fastapi import FastAPI, Request
237
  from fastapi.responses import HTMLResponse
238
  from fastapi.staticfiles import StaticFiles
 
247
  from simple_salesforce import Salesforce, SalesforceLogin
248
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
249
 
250
+ # Set up logging
251
+ logging.basicConfig(level=logging.INFO)
252
+ logger = logging.getLogger(__name__)
253
+
254
  # Define Pydantic model for incoming request body
255
  class MessageRequest(BaseModel):
256
  message: str
 
299
  tokenizer_name="google/flan-t5-small",
300
  context_window=512, # flan-t5-small has a max context window of 512 tokens
301
  max_new_tokens=256,
302
+ generate_kwargs={"temperature": 0.3, "do_sample": True}, # Increased temperature for better responses
303
  model=AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small"),
304
  tokenizer=tokenizer,
305
  device_map="auto" # Automatically use GPU if available, else CPU
 
318
  current_chat_history = []
319
 
320
  def data_ingestion_from_directory():
321
+ try:
322
+ documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
323
+ logger.info(f"Loaded {len(documents)} documents from {PDF_DIRECTORY}")
324
+ if not documents:
325
+ logger.warning(f"No documents found in {PDF_DIRECTORY}. Ensure PDF files are present.")
326
+ storage_context = StorageContext.from_defaults()
327
+ index = VectorStoreIndex.from_documents(documents)
328
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
329
+ logger.info(f"Index persisted to {PERSIST_DIR}")
330
+ except Exception as e:
331
+ logger.error(f"Error during data ingestion: {str(e)}")
332
+ raise
333
 
334
  def initialize():
335
  start_time = time.time()
336
  data_ingestion_from_directory() # Process PDF ingestion at startup
337
+ logger.info(f"Data ingestion time: {time.time() - start_time} seconds")
338
 
339
  def split_name(full_name):
340
  # Split the name by spaces
 
356
  initialize() # Run initialization tasks
357
 
358
  def handle_query(query):
359
+ # Custom prompt template for flan-t5-small
360
  text_qa_template = PromptTemplate(
361
  """
362
+ You are Clara, a Redfernstech chatbot. Answer the question in 10-15 words based on the provided context.
363
  Context: {context_str}
364
  Question: {query_str}
365
  Answer:
 
373
  if past_query.strip():
374
  context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
375
 
376
+ logger.info(f"Query: {query}")
377
+ logger.info(f"Context: {context_str}")
378
+
379
  query_engine = index.as_query_engine(text_qa_template=text_qa_template)
380
  answer = query_engine.query(query)
381
+
382
+ logger.info(f"Raw query engine output: {answer}")
383
+
384
+ if hasattr(answer, "response") and answer.response:
385
+ response = answer.response.strip()
386
+ elif isinstance(answer, dict) and "response" in answer and answer["response"]:
387
+ response = answer["response"].strip()
388
  else:
389
  response = "Sorry, I couldn't find an answer."
390
+
391
+ logger.info(f"Processed response: {response}")
392
  current_chat_history.append((query, response))
393
  return response
394
 
 
400
  async def save_chat_history(history: dict):
401
  # Check if 'userId' is present in the incoming dictionary
402
  user_id = history.get("userId")
403
+ logger.info(f"Received userId: {user_id}")
404
 
405
  # Ensure user_id is defined before proceeding
406
  if user_id is None:
 
409
  # Construct the chat history string
410
  hist = "".join([f"'{entry['sender']}: {entry['message']}'\n" for entry in history["history"]])
411
  hist = "You are a Redfernstech summarize model. Your aim is to use this conversation to identify user interests solely based on that conversation: " + hist
412
+ logger.info(f"Chat history: {hist}")
413
 
414
  # Get the summarized result
415
  result = hist
 
417
  try:
418
  sf.Lead.update(user_id, {"Description": result})
419
  except Exception as e:
420
+ logger.error(f"Failed to update lead: {str(e)}")
421
  return {"error": f"Failed to update lead: {str(e)}"}, 500
422
 
423
  return {"summary": result, "message": "Chat history saved"}
 
439
  # Generate a unique ID (for tracking user)
440
  unique_id = a["id"]
441
 
442
+ logger.info(f"Received form data: {form_data}")
 
443
 
444
  # Send back the unique id to the frontend
445
  return JSONResponse({"id": unique_id})
 
447
  @app.post("/chat/")
448
  async def chat(request: MessageRequest):
449
  message = request.message # Access the message from the request body
450
+ logger.info(f"Received chat message: {message}")
451
  response = handle_query(message) # Process the message
452
  message_data = {
453
  "sender": "User",
 
456
  "timestamp": datetime.datetime.now().isoformat()
457
  }
458
  chat_history.append(message_data)
459
+ logger.info(f"Chat response: {response}")
460
  return {"response": response}
461
 
462
  @app.get("/")