FernAI

Sleeping

App Files Files Community

redfernstech commited on 6 days ago

Commit

0259037

verified ·

1 Parent(s): 195dc2b

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -72

app.py CHANGED Viewed

@@ -236,189 +236,240 @@ from fastapi.templating import Jinja2Templates
 from simple_salesforce import Salesforce, SalesforceLogin
 from langchain_groq import ChatGroq
 from langchain_core.prompts import ChatPromptTemplate
-from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings, load_index_from_storage
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class MessageRequest(BaseModel):
     message: str
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 app.mount("/static", StaticFiles(directory="static"), name="static")
 templates = Jinja2Templates(directory="static")
 required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
 for var in required_env_vars:
     if not os.getenv(var):
-        logger.error(f"Missing environment variable: {var}")
         raise ValueError(f"Environment variable {var} is not set")
-# LLM & Embedding Setup
 GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
-llm = ChatGroq(model_name="llama3-8b-8192", api_key=GROQ_API_KEY, temperature=0.1, max_tokens=50)
 Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
-# Salesforce setup
 sf = None
 try:
     session_id, sf_instance = SalesforceLogin(
-        username=os.getenv("username"),
-        password=os.getenv("password"),
-        security_token=os.getenv("security_token"),
-        domain=os.getenv("domain")
     )
     sf = Salesforce(instance=sf_instance, session_id=session_id)
-    logger.info("Salesforce connected.")
 except Exception as e:
-    logger.warning(f"Salesforce connection failed: {e}")
 chat_history = []
 current_chat_history = []
 MAX_HISTORY_SIZE = 100
 PDF_DIRECTORY = "data"
 PERSIST_DIR = "db"
 os.makedirs(PDF_DIRECTORY, exist_ok=True)
 os.makedirs(PERSIST_DIR, exist_ok=True)
 def data_ingestion_from_directory():
     try:
         documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
         if not documents:
             logger.warning("No documents found in PDF_DIRECTORY")
-            return
         storage_context = StorageContext.from_defaults()
         index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
         index.storage_context.persist(persist_dir=PERSIST_DIR)
-        logger.info("Data ingestion and embedding complete.")
     except Exception as e:
-        logger.error(f"Data ingestion failed: {e}")
-        raise HTTPException(status_code=500, detail="Data ingestion failed")
 def initialize():
     try:
-        data_ingestion_from_directory()
     except Exception as e:
-        logger.error(f"Initialization error: {e}")
-        raise HTTPException(status_code=500, detail="Startup initialization failed")
 initialize()
 def handle_query(query: str) -> str:
     chat_context = ""
     for past_query, response in reversed(current_chat_history[-10:]):
-        chat_context += f"User: {past_query}\nBot: {response}\n"
-    # Load index
     try:
         storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
         index = load_index_from_storage(storage_context)
         query_engine = index.as_query_engine(similarity_top_k=2)
         retrieved = query_engine.query(query)
-        doc_context = getattr(retrieved, 'response', "No relevant documents found.")
     except Exception as e:
-        logger.error(f"Retrieval error: {e}")
-        doc_context = "No relevant documents found."
-    # Prompt template
     prompt_template = ChatPromptTemplate.from_messages([
         ("system", """
-You are a helpful and professional company chatbot.
-Answer user queries based on the provided document context and chat history.
-If you are unsure about the answer, politely respond with "I'm sorry, I don't know that yet."
-Document Context:
-{doc_context}
-Chat History:
-{chat_context}
-Question:
-{query}
-        """)
     ])
     prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
     try:
         response = llm.invoke(prompt)
         response_text = response.content.strip()
-        if "I'm sorry" not in response_text and len(response_text.strip()) < 3:
-            response_text = "I'm sorry, I don't know that yet."
     except Exception as e:
-        logger.error(f"Groq API Error: {e}")
-        response_text = "I'm sorry, I don't know that yet."
     if len(current_chat_history) >= MAX_HISTORY_SIZE:
         current_chat_history.pop(0)
     current_chat_history.append((query, response_text))
     return response_text
 @app.get("/ch/{id}", response_class=HTMLResponse)
 async def load_chat(request: Request, id: str):
     return templates.TemplateResponse("index.html", {"request": request, "user_id": id})
 @app.post("/hist/")
 async def save_chat_history(history: dict):
     if not sf:
-        return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
     user_id = history.get('userId')
     if not user_id:
-        return JSONResponse({"error": "userId missing"}, status_code=400)
-    hist = '\n'.join([f"{entry['sender']}: {entry['message']}" for entry in history.get("history", [])])
-    summary = "This is the chat summary: " + hist
     try:
-        sf.Lead.update(user_id, {'Description': summary})
-        return {"summary": summary, "message": "Chat history saved"}
     except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
 @app.post("/webhook")
 async def receive_form_data(request: Request):
     if not sf:
-        return JSONResponse({"error": "Salesforce not connected"}, status_code=503)
     try:
         form_data = await request.json()
     except json.JSONDecodeError:
         return JSONResponse({"error": "Invalid JSON"}, status_code=400)
-    first_name, last_name = split_name(form_data.get("name", ""))
-    lead_data = {
-        "FirstName": first_name,
-        "LastName": last_name,
-        "Company": form_data.get("company", ""),
-        "Phone": form_data.get("phone", ""),
-        "Email": form_data.get("email", ""),
-        "Description": "Lead from website form"
     }
     try:
-        result = sf.Lead.create(lead_data)
-        return {"id": result.get("id")}
     except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
 @app.post("/chat/")
 async def chat(request: MessageRequest):
-    message = request.message
     response = handle_query(message)
-    chat_entry = {
         "sender": "User",
         "message": message,
         "response": response,
@@ -426,24 +477,28 @@ async def chat(request: MessageRequest):
     }
     if len(chat_history) >= MAX_HISTORY_SIZE:
         chat_history.pop(0)
-    chat_history.append(chat_entry)
     return {"response": response}
 @app.get("/health")
 async def health_check():
     try:
         storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
-        load_index_from_storage(storage_context)
-        return {"status": "healthy"}
     except Exception as e:
         return {"status": "unhealthy", "error": str(e)}
 @app.get("/")
-def read_root():
-    return {"message": "Welcome to the company chatbot API"}
-def split_name(full_name):
-    parts = full_name.strip().split()
-    if len(parts) == 1:
-        return '', parts[0]
-    return parts[0], ' '.join(parts[1:])

 from simple_salesforce import Salesforce, SalesforceLogin
 from langchain_groq import ChatGroq
 from langchain_core.prompts import ChatPromptTemplate
+from llama_index.core import StorageContext, VectorStoreIndex, SimpleDirectoryReader, Settings
+from llama_index.core import load_index_from_storage
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Define Pydantic model for incoming request body
 class MessageRequest(BaseModel):
     message: str
+# Initialize FastAPI app
 app = FastAPI()
+# Allow CORS (restrict origins in production)
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],  # TODO: Restrict to specific origins in production
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# Mount static files and templates
 app.mount("/static", StaticFiles(directory="static"), name="static")
 templates = Jinja2Templates(directory="static")
+# Validate environment variables
 required_env_vars = ["CHATGROQ_API_KEY", "username", "password", "security_token", "domain", "HF_TOKEN"]
 for var in required_env_vars:
     if not os.getenv(var):
+        logger.error(f"Environment variable {var} is not set")
         raise ValueError(f"Environment variable {var} is not set")
+# Initialize Groq model
 GROQ_API_KEY = os.getenv("CHATGROQ_API_KEY")
+GROQ_MODEL = "llama3-8b-8192"
+try:
+    llm = ChatGroq(
+        model_name=GROQ_MODEL,
+        api_key=GROQ_API_KEY,
+        temperature=0.1,
+        max_tokens=50
+    )
+except Exception as e:
+    logger.error(f"Failed to initialize Groq model: {e}")
+    raise HTTPException(status_code=500, detail="Failed to initialize Groq model")
+# Configure LlamaIndex settings
 Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
+# Salesforce credentials
+username = os.getenv("username")
+password = os.getenv("password")
+security_token = os.getenv("security_token")
+domain = os.getenv("domain")  # e.g., 'test' for sandbox
+# Initialize Salesforce connection
 sf = None
 try:
     session_id, sf_instance = SalesforceLogin(
+        username=username, password=password, security_token=security_token, domain=domain
     )
     sf = Salesforce(instance=sf_instance, session_id=session_id)
+    logger.info("Salesforce connection established")
 except Exception as e:
+    logger.warning(f"Failed to connect to Salesforce: {e}. Continuing without Salesforce integration.")
+# Chat history
 chat_history = []
 current_chat_history = []
 MAX_HISTORY_SIZE = 100
+# Directories for data ingestion and storage
 PDF_DIRECTORY = "data"
 PERSIST_DIR = "db"
+# Ensure directories exist
 os.makedirs(PDF_DIRECTORY, exist_ok=True)
 os.makedirs(PERSIST_DIR, exist_ok=True)
 def data_ingestion_from_directory():
+    """Ingest documents from PDF_DIRECTORY and store embeddings in PERSIST_DIR."""
     try:
         documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
         if not documents:
             logger.warning("No documents found in PDF_DIRECTORY")
+            return False
         storage_context = StorageContext.from_defaults()
         index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
         index.storage_context.persist(persist_dir=PERSIST_DIR)
+        logger.info("Data ingestion and embedding storage completed successfully")
+        return True
     except Exception as e:
+        logger.error(f"Error during data ingestion: {e}")
+        raise HTTPException(status_code=500, detail=f"Data ingestion failed: {str(e)}")
 def initialize():
+    """Initialize the application by ingesting data and setting up embeddings."""
     try:
+        if not data_ingestion_from_directory():
+            logger.info("No documents to ingest, proceeding with empty index")
     except Exception as e:
+        logger.error(f"Initialization failed: {e}")
+        raise HTTPException(status_code=500, detail="Initialization failed")
+# Run initialization
 initialize()
 def handle_query(query: str) -> str:
+    """Handle user query by retrieving relevant documents and querying Groq LLM."""
+    # Prepare context from chat history
     chat_context = ""
     for past_query, response in reversed(current_chat_history[-10:]):
+        if past_query.strip():
+            chat_context += f"User: {past_query}\nBot: {response}\n"
+    # Load vector index and retrieve relevant documents
     try:
         storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
         index = load_index_from_storage(storage_context)
         query_engine = index.as_query_engine(similarity_top_k=2)
         retrieved = query_engine.query(query)
+        doc_context = retrieved.response if hasattr(retrieved, 'response') else "No relevant information found."
+        logger.info(f"Retrieved context for query '{query}': {doc_context[:100]}...")
     except Exception as e:
+        logger.error(f"Error retrieving documents: {e}")
+        doc_context = "Failed to retrieve relevant information."
+    # Construct prompt for Redferns Tech chatbot
     prompt_template = ChatPromptTemplate.from_messages([
         ("system", """
+        You are Clara, a chatbot for Redferns Tech. Provide accurate, professional answers in 10-15 words.
+        Use the provided document context and chat history to inform your response.
+        If you don't know the answer, politely say: "I'm sorry, I don't have the information to answer that."
+        Document Context:
+        {doc_context}
+        Chat History:
+        {chat_context}
+        Question:
+        {query}
+        """),
     ])
     prompt = prompt_template.format(doc_context=doc_context, chat_context=chat_context, query=query)
+    # Query Groq model
     try:
         response = llm.invoke(prompt)
         response_text = response.content.strip()
+        if not response_text or response_text.lower() == "unknown":
+            response_text = "I'm sorry, I don't have the information to answer that."
     except Exception as e:
+        logger.error(f"Error querying Groq API: {e}")
+        response_text = "I'm sorry, I don't have the information to answer that."
+    # Update chat history
     if len(current_chat_history) >= MAX_HISTORY_SIZE:
         current_chat_history.pop(0)
     current_chat_history.append((query, response_text))
     return response_text
 @app.get("/ch/{id}", response_class=HTMLResponse)
 async def load_chat(request: Request, id: str):
+    """Serve the chat interface for a specific user ID."""
     return templates.TemplateResponse("index.html", {"request": request, "user_id": id})
 @app.post("/hist/")
 async def save_chat_history(history: dict):
+    """Save chat history to Salesforce."""
     if not sf:
+        logger.error("Salesforce integration is disabled")
+        return JSONResponse({"error": "Salesforce integration is unavailable"}, status_code=503)
     user_id = history.get('userId')
     if not user_id:
+        logger.error("userId is missing in history request")
+        return JSONResponse({"error": "userId is required"}, status_code=400)
+    hist = ''.join([f"{entry['sender']}: {entry['message']}\n" for entry in history['history']])
+    summary_prompt = f"Summarize user interests from this conversation:\n{hist}"
     try:
+        sf.Lead.update(user_id, {'Description': summary_prompt})
+        logger.info(f"Chat history updated for user {user_id}")
+        return {"summary": summary_prompt, "message": "Chat history saved"}
     except Exception as e:
+        logger.error(f"Failed to update lead: {e}")
+        return JSONResponse({"error": f"Failed to update lead: {str(e)}"}, status_code=500)
 @app.post("/webhook")
 async def receive_form_data(request: Request):
+    """Create a Salesforce lead from form data."""
     if not sf:
+        logger.error("Salesforce integration is disabled")
+        return JSONResponse({"error": "Salesforce integration is unavailable"}, status_code=503)
     try:
         form_data = await request.json()
     except json.JSONDecodeError:
+        logger.error("Invalid JSON in webhook request")
         return JSONResponse({"error": "Invalid JSON"}, status_code=400)
+    first_name, last_name = split_name(form_data.get('name', ''))
+    data = {
+        'FirstName': first_name,
+        'LastName': last_name,
+        'Description': 'Lead created via webhook',
+        'Company': form_data.get('company', ''),
+        'Phone': form_data.get('phone', '').strip(),
+        'Email': form_data.get('email', ''),
     }
     try:
+        result = sf.Lead.create(data)
+        unique_id = result['id']
+        logger.info(f"Lead created with ID {unique_id}")
+        return JSONResponse({"id": unique_id})
     except Exception as e:
+        logger.error(f"Failed to create lead: {e}")
+        return JSONResponse({"error": f"Failed to create lead: {str(e)}"}, status_code=500)
 @app.post("/chat/")
 async def chat(request: MessageRequest):
+    """Handle chat messages and return responses."""
+    message = request.message.strip()
+    if not message:
+        return JSONResponse({"error": "Message cannot be empty"}, status_code=400)
     response = handle_query(message)
+    message_data = {
         "sender": "User",
         "message": message,
         "response": response,
     }
     if len(chat_history) >= MAX_HISTORY_SIZE:
         chat_history.pop(0)
+    chat_history.append(message_data)
+    logger.info(f"Chat message processed: {message}")
     return {"response": response}
 @app.get("/health")
 async def health_check():
+    """Check the health of the application."""
     try:
         storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
+        index = load_index_from_storage(storage_context)
+        logger.info("Vector index loaded successfully")
+        return {"status": "healthy", "pdf_ingestion": "successful"}
     except Exception as e:
+        logger.error(f"Health check failed: {e}")
         return {"status": "unhealthy", "error": str(e)}
 @app.get("/")
+async def read_root():
+    """Root endpoint for the API."""
+    return {"message": "Welcome to the Redferns Tech Chatbot API"}
+def split_name(full_name: str) -> tuple:
+    """Split a full name into first and last names."""
+    words = full_name.strip().split()
+    return (words[0], ' '.join(words[1:])) if words else ('', '')