Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -236,22 +236,18 @@ from fastapi.templating import Jinja2Templates
|
|
236 |
from simple_salesforce import Salesforce, SalesforceLogin
|
237 |
from langchain_groq import ChatGroq
|
238 |
from langchain_core.prompts import ChatPromptTemplate
|
239 |
-
from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings
|
240 |
-
from llama_index.core import load_index_from_storage # Added missing import
|
241 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
242 |
|
243 |
# Configure logging
|
244 |
logging.basicConfig(level=logging.INFO)
|
245 |
logger = logging.getLogger(__name__)
|
246 |
|
247 |
-
# Define Pydantic model for incoming request body
|
248 |
class MessageRequest(BaseModel):
|
249 |
message: str
|
250 |
|
251 |
-
# Initialize FastAPI app
|
252 |
app = FastAPI()
|
253 |
|
254 |
-
# Allow CORS requests (restrict in production)
|
255 |
app.add_middleware(
|
256 |
CORSMiddleware,
|
257 |
allow_origins=["*"],
|
@@ -260,61 +256,40 @@ app.add_middleware(
|
|
260 |
allow_headers=["*"],
|
261 |
)
|
262 |
|
263 |
-
# Mount static files
|
264 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
265 |
templates = Jinja2Templates(directory="static")
|
266 |
|
267 |
-
# Validate environment variables
|
268 |
required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
|
269 |
for var in required_env_vars:
|
270 |
if not os.getenv(var):
|
271 |
-
logger.error(f"
|
272 |
raise ValueError(f"Environment variable {var} is not set")
|
273 |
|
274 |
-
#
|
275 |
GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
|
276 |
-
|
277 |
-
try:
|
278 |
-
llm = ChatGroq(
|
279 |
-
model_name=GROQ_MODEL,
|
280 |
-
api_key=GROQ_API_KEY,
|
281 |
-
temperature=0.1,
|
282 |
-
max_tokens=50
|
283 |
-
)
|
284 |
-
except Exception as e:
|
285 |
-
logger.error(f"Failed to initialize Groq model: {e}")
|
286 |
-
raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
|
287 |
-
|
288 |
-
# Configure LlamaIndex settings
|
289 |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
290 |
|
291 |
-
# Salesforce
|
292 |
-
username = os.getenv("username")
|
293 |
-
password = os.getenv("password")
|
294 |
-
security_token = os.getenv("security_token")
|
295 |
-
domain = os.getenv("domain") # e.g., 'test' for sandbox
|
296 |
-
|
297 |
-
# Initialize Salesforce connection (allow failure)
|
298 |
sf = None
|
299 |
try:
|
300 |
session_id, sf_instance = SalesforceLogin(
|
301 |
-
username=username,
|
|
|
|
|
|
|
302 |
)
|
303 |
sf = Salesforce(instance=sf_instance, session_id=session_id)
|
304 |
-
logger.info("Salesforce
|
305 |
except Exception as e:
|
306 |
-
logger.
|
307 |
|
308 |
-
# Chat history
|
309 |
chat_history = []
|
310 |
current_chat_history = []
|
311 |
-
MAX_HISTORY_SIZE = 100
|
312 |
|
313 |
-
# Directories for data ingestion
|
314 |
PDF_DIRECTORY = "data"
|
315 |
PERSIST_DIR = "db"
|
316 |
-
|
317 |
-
# Ensure directories exist
|
318 |
os.makedirs(PDF_DIRECTORY, exist_ok=True)
|
319 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
320 |
|
@@ -327,70 +302,68 @@ def data_ingestion_from_directory():
|
|
327 |
storage_context = StorageContext.from_defaults()
|
328 |
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
|
329 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
330 |
-
logger.info("Data ingestion
|
331 |
except Exception as e:
|
332 |
-
logger.error(f"
|
333 |
-
raise HTTPException(status_code=500, detail=
|
334 |
|
335 |
def initialize():
|
336 |
try:
|
337 |
-
data_ingestion_from_directory()
|
338 |
except Exception as e:
|
339 |
-
logger.error(f"Initialization
|
340 |
-
raise HTTPException(status_code=500, detail="
|
341 |
|
342 |
-
initialize()
|
343 |
|
344 |
-
def handle_query(query):
|
345 |
-
# Prepare context from chat history
|
346 |
chat_context = ""
|
347 |
-
for past_query, response in reversed(current_chat_history[-10:]):
|
348 |
-
|
349 |
-
chat_context += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
|
350 |
|
351 |
-
# Load
|
352 |
try:
|
353 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
354 |
index = load_index_from_storage(storage_context)
|
355 |
query_engine = index.as_query_engine(similarity_top_k=2)
|
356 |
retrieved = query_engine.query(query)
|
357 |
-
doc_context =
|
358 |
-
logger.info(f"Retrieved documents for query '{query}': {doc_context[:100]}...")
|
359 |
except Exception as e:
|
360 |
-
logger.error(f"
|
361 |
-
doc_context = "
|
362 |
|
363 |
-
#
|
364 |
prompt_template = ChatPromptTemplate.from_messages([
|
365 |
("system", """
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
|
370 |
-
|
371 |
-
|
372 |
|
373 |
-
|
374 |
-
|
375 |
|
376 |
-
|
377 |
-
|
378 |
-
""")
|
379 |
])
|
380 |
prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
|
381 |
|
382 |
-
# Query Groq model
|
383 |
try:
|
384 |
response = llm.invoke(prompt)
|
385 |
response_text = response.content.strip()
|
|
|
|
|
386 |
except Exception as e:
|
387 |
-
logger.error(f"
|
388 |
-
response_text = "
|
389 |
|
390 |
-
# Update chat history
|
391 |
if len(current_chat_history) >= MAX_HISTORY_SIZE:
|
392 |
current_chat_history.pop(0)
|
393 |
current_chat_history.append((query, response_text))
|
|
|
394 |
return response_text
|
395 |
|
396 |
@app.get("/ch/{id}", response_class=HTMLResponse)
|
@@ -400,62 +373,52 @@ async def load_chat(request: Request, id: str):
|
|
400 |
@app.post("/hist/")
|
401 |
async def save_chat_history(history: dict):
|
402 |
if not sf:
|
403 |
-
|
404 |
-
return {"error": "Salesforce integration is unavailable"}, 503
|
405 |
|
406 |
user_id = history.get('userId')
|
407 |
if not user_id:
|
408 |
-
|
409 |
-
return {"error": "userId is required"}, 400
|
410 |
|
411 |
-
hist = ''.join([f"
|
412 |
-
|
413 |
|
414 |
try:
|
415 |
-
sf.Lead.update(user_id, {'Description':
|
416 |
-
|
417 |
except Exception as e:
|
418 |
-
|
419 |
-
return {"error": f"Failed to update lead: {str(e)}"}, 500
|
420 |
-
|
421 |
-
return {"summary": hist, "message": "Chat history saved"}
|
422 |
|
423 |
@app.post("/webhook")
|
424 |
async def receive_form_data(request: Request):
|
425 |
if not sf:
|
426 |
-
|
427 |
-
return {"error": "Salesforce integration is unavailable"}, 503
|
428 |
|
429 |
try:
|
430 |
form_data = await request.json()
|
431 |
except json.JSONDecodeError:
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
'Email': form_data.get('email', ''),
|
443 |
}
|
444 |
|
445 |
try:
|
446 |
-
result = sf.Lead.create(
|
447 |
-
|
448 |
-
logger.info(f"Lead created with ID {unique_id}")
|
449 |
-
return JSONResponse({"id": unique_id})
|
450 |
except Exception as e:
|
451 |
-
|
452 |
-
return {"error": f"Failed to create lead: {str(e)}"}, 500
|
453 |
|
454 |
@app.post("/chat/")
|
455 |
async def chat(request: MessageRequest):
|
456 |
message = request.message
|
457 |
response = handle_query(message)
|
458 |
-
|
459 |
"sender": "User",
|
460 |
"message": message,
|
461 |
"response": response,
|
@@ -463,30 +426,24 @@ async def chat(request: MessageRequest):
|
|
463 |
}
|
464 |
if len(chat_history) >= MAX_HISTORY_SIZE:
|
465 |
chat_history.pop(0)
|
466 |
-
chat_history.append(
|
467 |
-
logger.info(f"Chat message processed: {message}")
|
468 |
return {"response": response}
|
469 |
|
470 |
@app.get("/health")
|
471 |
async def health_check():
|
472 |
try:
|
473 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
474 |
-
|
475 |
-
|
476 |
-
return {"status": "healthy", "pdf_ingestion": "successful"}
|
477 |
except Exception as e:
|
478 |
-
logger.error(f"Health check failed: {e}")
|
479 |
return {"status": "unhealthy", "error": str(e)}
|
480 |
|
481 |
@app.get("/")
|
482 |
def read_root():
|
483 |
-
return {"message": "Welcome to the
|
484 |
|
485 |
def split_name(full_name):
|
486 |
-
|
487 |
-
if len(
|
488 |
-
return '',
|
489 |
-
|
490 |
-
return words[0], words[1]
|
491 |
-
else:
|
492 |
-
return words[0], ' '.join(words[1:])
|
|
|
236 |
from simple_salesforce import Salesforce, SalesforceLogin
|
237 |
from langchain_groq import ChatGroq
|
238 |
from langchain_core.prompts import ChatPromptTemplate
|
239 |
+
from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings, load_index_from_storage
|
|
|
240 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
241 |
|
242 |
# Configure logging
|
243 |
logging.basicConfig(level=logging.INFO)
|
244 |
logger = logging.getLogger(__name__)
|
245 |
|
|
|
246 |
class MessageRequest(BaseModel):
|
247 |
message: str
|
248 |
|
|
|
249 |
app = FastAPI()
|
250 |
|
|
|
251 |
app.add_middleware(
|
252 |
CORSMiddleware,
|
253 |
allow_origins=["*"],
|
|
|
256 |
allow_headers=["*"],
|
257 |
)
|
258 |
|
|
|
259 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
260 |
templates = Jinja2Templates(directory="static")
|
261 |
|
|
|
262 |
required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
|
263 |
for var in required_env_vars:
|
264 |
if not os.getenv(var):
|
265 |
+
logger.error(f"Missing environment variable: {var}")
|
266 |
raise ValueError(f"Environment variable {var} is not set")
|
267 |
|
268 |
+
# LLM & Embedding Setup
|
269 |
GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
|
270 |
+
llm = ChatGroq(model_name="llama3-8b-8192", api_key=GROQ_API_KEY, temperature=0.1, max_tokens=50)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
272 |
|
273 |
+
# Salesforce setup
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
sf = None
|
275 |
try:
|
276 |
session_id, sf_instance = SalesforceLogin(
|
277 |
+
username=os.getenv("username"),
|
278 |
+
password=os.getenv("password"),
|
279 |
+
security_token=os.getenv("security_token"),
|
280 |
+
domain=os.getenv("domain")
|
281 |
)
|
282 |
sf = Salesforce(instance=sf_instance, session_id=session_id)
|
283 |
+
logger.info("Salesforce connected.")
|
284 |
except Exception as e:
|
285 |
+
logger.warning(f"Salesforce connection failed: {e}")
|
286 |
|
|
|
287 |
chat_history = []
|
288 |
current_chat_history = []
|
289 |
+
MAX_HISTORY_SIZE = 100
|
290 |
|
|
|
291 |
PDF_DIRECTORY = "data"
|
292 |
PERSIST_DIR = "db"
|
|
|
|
|
293 |
os.makedirs(PDF_DIRECTORY, exist_ok=True)
|
294 |
os.makedirs(PERSIST_DIR, exist_ok=True)
|
295 |
|
|
|
302 |
storage_context = StorageContext.from_defaults()
|
303 |
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
|
304 |
index.storage_context.persist(persist_dir=PERSIST_DIR)
|
305 |
+
logger.info("Data ingestion and embedding complete.")
|
306 |
except Exception as e:
|
307 |
+
logger.error(f"Data ingestion failed: {e}")
|
308 |
+
raise HTTPException(status_code=500, detail="Data ingestion failed")
|
309 |
|
310 |
def initialize():
|
311 |
try:
|
312 |
+
data_ingestion_from_directory()
|
313 |
except Exception as e:
|
314 |
+
logger.error(f"Initialization error: {e}")
|
315 |
+
raise HTTPException(status_code=500, detail="Startup initialization failed")
|
316 |
|
317 |
+
initialize()
|
318 |
|
319 |
+
def handle_query(query: str) -> str:
|
|
|
320 |
chat_context = ""
|
321 |
+
for past_query, response in reversed(current_chat_history[-10:]):
|
322 |
+
chat_context += f"User: {past_query}\nBot: {response}\n"
|
|
|
323 |
|
324 |
+
# Load index
|
325 |
try:
|
326 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
327 |
index = load_index_from_storage(storage_context)
|
328 |
query_engine = index.as_query_engine(similarity_top_k=2)
|
329 |
retrieved = query_engine.query(query)
|
330 |
+
doc_context = getattr(retrieved, 'response', "No relevant documents found.")
|
|
|
331 |
except Exception as e:
|
332 |
+
logger.error(f"Retrieval error: {e}")
|
333 |
+
doc_context = "No relevant documents found."
|
334 |
|
335 |
+
# Prompt template
|
336 |
prompt_template = ChatPromptTemplate.from_messages([
|
337 |
("system", """
|
338 |
+
You are a helpful and professional company chatbot.
|
339 |
+
Answer user queries based on the provided document context and chat history.
|
340 |
+
If you are unsure about the answer, politely respond with "I'm sorry, I don't know that yet."
|
341 |
|
342 |
+
Document Context:
|
343 |
+
{doc_context}
|
344 |
|
345 |
+
Chat History:
|
346 |
+
{chat_context}
|
347 |
|
348 |
+
Question:
|
349 |
+
{query}
|
350 |
+
""")
|
351 |
])
|
352 |
prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
|
353 |
|
|
|
354 |
try:
|
355 |
response = llm.invoke(prompt)
|
356 |
response_text = response.content.strip()
|
357 |
+
if "I'm sorry" not in response_text and len(response_text.strip()) < 3:
|
358 |
+
response_text = "I'm sorry, I don't know that yet."
|
359 |
except Exception as e:
|
360 |
+
logger.error(f"Groq API Error: {e}")
|
361 |
+
response_text = "I'm sorry, I don't know that yet."
|
362 |
|
|
|
363 |
if len(current_chat_history) >= MAX_HISTORY_SIZE:
|
364 |
current_chat_history.pop(0)
|
365 |
current_chat_history.append((query, response_text))
|
366 |
+
|
367 |
return response_text
|
368 |
|
369 |
@app.get("/ch/{id}", response_class=HTMLResponse)
|
|
|
373 |
@app.post("/hist/")
|
374 |
async def save_chat_history(history: dict):
|
375 |
if not sf:
|
376 |
+
return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
|
|
|
377 |
|
378 |
user_id = history.get('userId')
|
379 |
if not user_id:
|
380 |
+
return JSONResponse({"error": "userId missing"}, status_code=400)
|
|
|
381 |
|
382 |
+
hist = '\n'.join([f"{entry['sender']}: {entry['message']}" for entry in history.get("history", [])])
|
383 |
+
summary = "This is the chat summary: " + hist
|
384 |
|
385 |
try:
|
386 |
+
sf.Lead.update(user_id, {'Description': summary})
|
387 |
+
return {"summary": summary, "message": "Chat history saved"}
|
388 |
except Exception as e:
|
389 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
|
|
|
|
|
|
390 |
|
391 |
@app.post("/webhook")
|
392 |
async def receive_form_data(request: Request):
|
393 |
if not sf:
|
394 |
+
return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
|
|
|
395 |
|
396 |
try:
|
397 |
form_data = await request.json()
|
398 |
except json.JSONDecodeError:
|
399 |
+
return JSONResponse({"error": "Invalid JSON"}, status_code=400)
|
400 |
+
|
401 |
+
first_name, last_name = split_name(form_data.get("name", ""))
|
402 |
+
lead_data = {
|
403 |
+
"FirstName": first_name,
|
404 |
+
"LastName": last_name,
|
405 |
+
"Company": form_data.get("company", ""),
|
406 |
+
"Phone": form_data.get("phone", ""),
|
407 |
+
"Email": form_data.get("email", ""),
|
408 |
+
"Description": "Lead from website form"
|
|
|
409 |
}
|
410 |
|
411 |
try:
|
412 |
+
result = sf.Lead.create(lead_data)
|
413 |
+
return {"id": result.get("id")}
|
|
|
|
|
414 |
except Exception as e:
|
415 |
+
return JSONResponse({"error": str(e)}, status_code=500)
|
|
|
416 |
|
417 |
@app.post("/chat/")
|
418 |
async def chat(request: MessageRequest):
|
419 |
message = request.message
|
420 |
response = handle_query(message)
|
421 |
+
chat_entry = {
|
422 |
"sender": "User",
|
423 |
"message": message,
|
424 |
"response": response,
|
|
|
426 |
}
|
427 |
if len(chat_history) >= MAX_HISTORY_SIZE:
|
428 |
chat_history.pop(0)
|
429 |
+
chat_history.append(chat_entry)
|
|
|
430 |
return {"response": response}
|
431 |
|
432 |
@app.get("/health")
|
433 |
async def health_check():
|
434 |
try:
|
435 |
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
|
436 |
+
load_index_from_storage(storage_context)
|
437 |
+
return {"status": "healthy"}
|
|
|
438 |
except Exception as e:
|
|
|
439 |
return {"status": "unhealthy", "error": str(e)}
|
440 |
|
441 |
@app.get("/")
|
442 |
def read_root():
|
443 |
+
return {"message": "Welcome to the company chatbot API"}
|
444 |
|
445 |
def split_name(full_name):
|
446 |
+
parts = full_name.strip().split()
|
447 |
+
if len(parts) == 1:
|
448 |
+
return '', parts[0]
|
449 |
+
return parts[0], ' '.join(parts[1:])
|
|
|
|
|
|