Spaces:

Rabbit-Innotech
/

GBVR_Chatbot

Running

File size: 59,902 Bytes

# # # import os
# # # import time
# # # import pandas as pd
# # # import gradio as gr
# # # from langchain_groq import ChatGroq
# # # from langchain_huggingface import HuggingFaceEmbeddings
# # # from langchain_community.vectorstores import Chroma
# # # from langchain_core.prompts import PromptTemplate
# # # from langchain_core.output_parsers import StrOutputParser
# # # from langchain_core.runnables import RunnablePassthrough
# # # from PyPDF2 import PdfReader


# # # # Configuration constants
# # # COLLECTION_NAME = "GBVRS"
# # # DATA_FOLDER = "./"
# # # APP_VERSION = "v1.0.0"
# # # APP_NAME = "Ijwi ry'Ubufasha"
# # # MAX_HISTORY_MESSAGES = 8  # Limit history to avoid token limits

# # # # Global variables for application state
# # # llm = None
# # # embed_model = None
# # # vectorstore = None
# # # retriever = None
# # # rag_chain = None

# # # # User session management
# # # class UserSession:
# # #     def __init__(self, session_id, llm):
# # #         """Initialize a user session with unique ID and language model."""
# # #         self.session_id = session_id
# # #         self.user_info = {"Nickname": "Guest"}
# # #         self.conversation_history = []
# # #         self.llm = llm
# # #         self.welcome_message = None
# # #         self.last_activity = time.time()
        
# # #     def set_user(self, user_info):
# # #         """Set user information and generate welcome message."""
# # #         self.user_info = user_info
# # #         self.generate_welcome_message()
        
# # #         # Initialize conversation history with welcome message
# # #         welcome = self.get_welcome_message()
# # #         self.conversation_history = [
# # #             {"role": "assistant", "content": welcome},
# # #         ]
        
# # #     def get_user(self):
# # #         """Get current user information."""
# # #         return self.user_info
    
# # #     def generate_welcome_message(self):
# # #         """Generate a dynamic welcome message using the LLM."""
# # #         try:
# # #             nickname = self.user_info.get("Nickname", "Guest")
            
# # #             # Use the LLM to generate the message
# # #             prompt = (
# # #                 f"Create a brief and warm welcome message for {nickname} that's about 1-2 sentences. "
# # #                 f"Emphasize this is a safe space for discussing gender-based violence issues "
# # #                 f"and that we provide support and resources. Keep it warm and reassuring."
# # #             )
            
# # #             response = self.llm.invoke(prompt)
# # #             welcome = response.content.strip()
            
# # #             # Format the message with HTML styling
# # #             self.welcome_message = (
# # #                 f"<div style='font-size: 18px; color: #4E6BBF;'>"
# # #                 f"{welcome}"
# # #                 f"</div>"
# # #             )
# # #         except Exception as e:
# # #             # Fallback welcome message
# # #             nickname = self.user_info.get("Nickname", "Guest")
# # #             self.welcome_message = (
# # #                 f"<div style='font-size: 18px; color: #4E6BBF;'>"
# # #                 f"Welcome, {nickname}! You're in a safe space. We're here to provide support with "
# # #                 f"gender-based violence issues and connect you with resources that can help."
# # #                 f"</div>"
# # #             )
    
# # #     def get_welcome_message(self):
# # #         """Get the formatted welcome message."""
# # #         if not self.welcome_message:
# # #             self.generate_welcome_message()
# # #         return self.welcome_message
    
# # #     def add_to_history(self, role, message):
# # #         """Add a message to the conversation history."""
# # #         self.conversation_history.append({"role": role, "content": message})
# # #         self.last_activity = time.time()
        
# # #         # Trim history if it gets too long
# # #         if len(self.conversation_history) > MAX_HISTORY_MESSAGES * 2:  # Keep pairs of messages
# # #             # Keep the first message (welcome) and the most recent messages
# # #             self.conversation_history = [self.conversation_history[0]] + self.conversation_history[-MAX_HISTORY_MESSAGES*2+1:]
    
# # #     def get_conversation_history(self):
# # #         """Get the full conversation history."""
# # #         return self.conversation_history
    
# # #     def get_formatted_history(self):
# # #         """Get conversation history formatted as a string for the LLM."""
# # #         # Skip the welcome message and only include the last few exchanges
# # #         recent_history = self.conversation_history[1:] if len(self.conversation_history) > 1 else []
        
# # #         # Limit to last MAX_HISTORY_MESSAGES exchanges
# # #         if len(recent_history) > MAX_HISTORY_MESSAGES * 2:
# # #             recent_history = recent_history[-MAX_HISTORY_MESSAGES*2:]
            
# # #         formatted_history = ""
# # #         for entry in recent_history:
# # #             role = "User" if entry["role"] == "user" else "Assistant"
# # #             # Truncate very long messages to avoid token limits
# # #             content = entry["content"]
# # #             if len(content) > 500:  # Limit message length
# # #                 content = content[:500] + "..."
# # #             formatted_history += f"{role}: {content}\n\n"
            
# # #         return formatted_history
    
# # #     def is_expired(self, timeout_seconds=3600):
# # #         """Check if the session has been inactive for too long."""
# # #         return (time.time() - self.last_activity) > timeout_seconds

# # # # Session manager to handle multiple users
# # # class SessionManager:
# # #     def __init__(self):
# # #         """Initialize the session manager."""
# # #         self.sessions = {}
# # #         self.session_timeout = 3600  # 1 hour timeout
        
# # #     def get_session(self, session_id):
# # #         """Get an existing session or create a new one."""
# # #         # Clean expired sessions first
# # #         self._clean_expired_sessions()
        
# # #         # Create new session if needed
# # #         if session_id not in self.sessions:
# # #             self.sessions[session_id] = UserSession(session_id, llm)
            
# # #         return self.sessions[session_id]
    
# # #     def _clean_expired_sessions(self):
# # #         """Remove expired sessions to free up memory."""
# # #         expired_keys = []
# # #         for key, session in self.sessions.items():
# # #             if session.is_expired(self.session_timeout):
# # #                 expired_keys.append(key)
                
# # #         for key in expired_keys:
# # #             del self.sessions[key]

# # # # Initialize the session manager
# # # session_manager = SessionManager()

# # # def initialize_assistant():
# # #     """Initialize the assistant with necessary components and configurations."""
# # #     global llm, embed_model, vectorstore, retriever, rag_chain
    
# # #     # Initialize API key - try both possible key names
# # #     groq_api_key = os.environ.get('GBV') or os.environ.get('GBV')
# # #     if not groq_api_key:
# # #         print("WARNING: No GROQ API key found in userdata.")
        
# # #     # Initialize LLM - Default to Llama model which is more widely available
# # #     llm = ChatGroq(
# # #         model="llama-3.3-70b-versatile",  # More reliable than whisper model
# # #         api_key=groq_api_key
# # #     )
    
# # #     # Set up embedding model
# # #     try:
# # #         embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
# # #     except Exception as e:
# # #         # Fallback to smaller model
# # #         embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        
# # #     # Process data and create vector store
# # #     print("Processing data files...")
# # #     data = process_data_files()
    
# # #     print("Creating vector store...")
# # #     vectorstore = create_vectorstore(data)
# # #     retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    
# # #     # Create RAG chain
# # #     print("Setting up RAG chain...")
# # #     rag_chain = create_rag_chain()
    
# # #     print(f"✅ {APP_NAME} initialized successfully")

# # # def process_data_files():
# # #     """Process all data files from the specified folder."""
# # #     context_data = []
    
# # #     try:
# # #         if not os.path.exists(DATA_FOLDER):
# # #             print(f"WARNING: Data folder does not exist: {DATA_FOLDER}")
# # #             return context_data
            
# # #         # Get list of data files
# # #         all_files = os.listdir(DATA_FOLDER)
# # #         data_files = [f for f in all_files if f.lower().endswith(('.csv', '.xlsx', '.xls'))]
        
# # #         if not data_files:
# # #             print(f"WARNING: No data files found in: {DATA_FOLDER}")
# # #             return context_data
            
# # #         # Process each file
# # #         for index, file_name in enumerate(data_files, 1):
# # #             print(f"Processing file {index}/{len(data_files)}: {file_name}")
# # #             file_path = os.path.join(DATA_FOLDER, file_name)
            
# # #             try:
# # #                 # Read file based on extension
# # #                 if file_name.lower().endswith('.csv'):
# # #                     df = pd.read_csv(file_path)
# # #                 else:
# # #                     df = pd.read_excel(file_path)
                
# # #                 # Check if column 3 exists (source data is in third column)
# # #                 if df.shape[1] > 2:
# # #                     column_data = df.iloc[:, 2].dropna().astype(str).tolist()
                    
# # #                     # Each row becomes one chunk with metadata
# # #                     for i, text in enumerate(column_data):
# # #                         if text and len(text.strip()) > 0:
# # #                             context_data.append({
# # #                                 "page_content": text, 
# # #                                 "metadata": {
# # #                                     "source": file_name, 
# # #                                     "row": i+1
# # #                                 }
# # #                             })
# # #                 else:
# # #                     print(f"WARNING: File {file_name} has fewer than 3 columns.")
                    
# # #             except Exception as e:
# # #                 print(f"ERROR processing file {file_name}: {e}")
        
# # #         print(f"✅ Created {len(context_data)} chunks from {len(data_files)} files.")
        
# # #     except Exception as e:
# # #         print(f"ERROR accessing data folder: {e}")
        
# # #     return context_data
    
# # # def create_vectorstore(data):
# # #     """
# # #     Creates and returns a Chroma vector store populated with the provided data.

# # #     Parameters:
# # #         data (list): A list of dictionaries, each containing 'page_content' and 'metadata'.

# # #     Returns:
# # #         Chroma: The populated Chroma vector store instance.
# # #     """
# # #     # Initialize the vector store
# # #     vectorstore = Chroma(
# # #         collection_name=COLLECTION_NAME,
# # #         embedding_function=embed_model,
# # #         persist_directory="./"
# # #     )

# # #     if not data:
# # #         print("⚠️ No data provided. Returning an empty vector store.")
# # #         return vectorstore

# # #     try:
# # #         # Extract text and metadata from the data
# # #         texts = [doc["page_content"] for doc in data]

# # #         # Add the texts and metadata to the vector store
# # #         vectorstore.add_texts(texts)
# # #     except Exception as e:
# # #         print(f"❌ Failed to add documents to vector store: {e}")

# # #     # Fix: Return vectorstore instead of vs
# # #     return vectorstore  # Changed from 'return vs' to 'return vectorstore'


# # # def create_rag_chain():
# # #     """Create the RAG chain for processing user queries."""
# # #     # Define the prompt template
# # #     template = """
# # #      You are a compassionate and supportive AI assistant specializing in helping individuals affected by Gender-Based Violence (GBV). Your responses must be based EXCLUSIVELY on the information provided in the context. Your primary goal is to provide emotionally intelligent support while maintaining appropriate boundaries.
        
# # #         **Previous conversation:** {conversation_history}
# # #         **Context information:** {context}
# # #         **User's Question:** {question}
        
# # #         When responding follow these guidelines:
        
# # #         1. **Strict Context Adherence**
# # #            - Only use information that appears in the provided {context}
# # #            - If the answer is not found in the context, state "I don't have that information in my available resources" rather than generating a response
        
# # #         2. **Personalized Communication**
# # #            - Avoid contractions (e.g., use I am instead of I'm)
# # #            - Incorporate thoughtful pauses or reflective questions when the conversation involves difficult topics
# # #            - Use selective emojis (😊, 🤗, ❤️) only when tone-appropriate and not during crisis discussions
# # #            - Balance warmth with professionalism
                
# # #         3. **Emotional Intelligence** 
# # #            - Validate feelings without judgment 
# # #            - Offer reassurance when appropriate, always centered on empowerment
# # #            - Adjust your tone based on the emotional state conveyed
            
# # #         4. **Conversation Management**
# # #            - Refer to {conversation_history} to maintain continuity and avoid repetition
# # #            - Use clear paragraph breaks for readability
                
# # #         5. **Information Delivery**
# # #            - Extract only relevant information from {context} that directly addresses the question
# # #            - Present information in accessible, non-technical language
# # #            - When information is unavailable, respond with: "I don't have that specific information right now, {first_name}. Would it be helpful if I focus on [alternative support option]?"
        
# # #         6. **Safety and Ethics**
# # #            - Do not generate any speculative content or advice not supported by the context
# # #            - If the context contains safety information, prioritize sharing that information
        
# # #         Your response must come entirely from the provided context, maintaining the supportive tone while never introducing information from outside the provided materials.
# # #         **Context:** {context}
# # #         **User's Question:** {question}
# # #         **Your Response:**
# # #     """

    
# # #     rag_prompt = PromptTemplate.from_template(template)
    
# # #     def get_context_and_question(query_with_session):
# # #         # Extract query and session_id
# # #         query = query_with_session["query"]
# # #         session_id = query_with_session["session_id"]
        
# # #         # Get the user session
# # #         session = session_manager.get_session(session_id)
# # #         user_info = session.get_user()
# # #         first_name = user_info.get("Nickname", "User")
# # #         conversation_hist = session.get_formatted_history()
        
# # #         try:
# # #             # Retrieve relevant documents
# # #             retrieved_docs = retriever.invoke(query)
# # #             context_str = format_context(retrieved_docs)
# # #         except Exception as e:
# # #             print(f"ERROR retrieving documents: {e}")
# # #             context_str = "No relevant information found."
        
# # #         # Return the combined inputs for the prompt
# # #         return {
# # #             "context": context_str,
# # #             "question": query,
# # #             "first_name": first_name,
# # #             "conversation_history": conversation_hist
# # #         }
    
# # #     # Build the chain
# # #     try:
# # #         chain = (
# # #             RunnablePassthrough() 
# # #             | get_context_and_question 
# # #             | rag_prompt 
# # #             | llm 
# # #             | StrOutputParser()
# # #         )
# # #         return chain
# # #     except Exception as e:
# # #         print(f"ERROR creating RAG chain: {e}")
        
# # #         # Return a simple function as fallback
# # #         def fallback_chain(query_with_session):
# # #             session_id = query_with_session["session_id"]
# # #             session = session_manager.get_session(session_id)
# # #             nickname = session.get_user().get("Nickname", "there")
# # #             return f"I'm here to help you, {nickname}, but I'm experiencing some technical difficulties right now. Please try again shortly."
            
# # #         return fallback_chain

# # # def format_context(retrieved_docs):
# # #     """Format retrieved documents into a string context."""
# # #     if not retrieved_docs:
# # #         return "No relevant information available."
# # #     return "\n\n".join([doc.page_content for doc in retrieved_docs])

# # # def rag_memory_stream(message, history, session_id):
# # #     """Process user message and generate response with memory."""
# # #     # Get the user session
# # #     session = session_manager.get_session(session_id)
    
# # #     # Add user message to history
# # #     session.add_to_history("user", message)

# # #     try:
# # #         # Get response from RAG chain
# # #         print(f"Processing message for session {session_id}: {message[:50]}...")
        
# # #         # Pass both query and session_id to the chain
# # #         response = rag_chain.invoke({
# # #             "query": message,
# # #             "session_id": session_id
# # #         })
        
# # #         print(f"Generated response: {response[:50]}...")
        
# # #         # Add assistant response to history
# # #         session.add_to_history("assistant", response)
        
# # #         # Yield the response
# # #         yield response
        
# # #     except Exception as e:
# # #         import traceback
# # #         print(f"ERROR in rag_memory_stream: {e}")
# # #         print(f"Detailed error: {traceback.format_exc()}")
        
# # #         nickname = session.get_user().get("Nickname", "there")
# # #         error_msg = f"I'm sorry, {nickname}. I encountered an error processing your request. Let's try a different question."
# # #         session.add_to_history("assistant", error_msg)
# # #         yield error_msg

# # # def collect_user_info(nickname, session_id):
# # #     """Store user details and initialize session."""
# # #     if not nickname or nickname.strip() == "":
# # #         return "Nickname is required to proceed.", gr.update(visible=False), gr.update(visible=True), []

# # #     # Store user info for chat session
# # #     user_info = {
# # #         "Nickname": nickname.strip(),
# # #         "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
# # #     }

# # #     # Get the session and set user info
# # #     session = session_manager.get_session(session_id)
# # #     session.set_user(user_info)

# # #     # Generate welcome message
# # #     welcome_message = session.get_welcome_message()

# # #     # Return welcome message and update UI
# # #     return welcome_message, gr.update(visible=True), gr.update(visible=False), [(None, welcome_message)]

# # # def get_css():
# # #     """Define CSS for the UI."""
# # #     return """
# # #     :root {
# # #         --primary: #4E6BBF;
# # #         --primary-light: #697BBF;
# # #         --text-primary: #333333;
# # #         --text-secondary: #666666;
# # #         --background: #F9FAFC;
# # #         --card-bg: #FFFFFF;
# # #         --border: #E1E5F0;
# # #         --shadow: rgba(0, 0, 0, 0.05);
# # #     }

# # #     body, .gradio-container {
# # #         margin: 0;
# # #         padding: 0;
# # #         width: 100vw;
# # #         height: 100vh;
# # #         display: flex;
# # #         flex-direction: column;
# # #         justify-content: center;
# # #         align-items: center;
# # #         background: var(--background);
# # #         color: var(--text-primary);
# # #         font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
# # #     }

# # #     .gradio-container {
# # #         max-width: 100%;
# # #         max-height: 100%;
# # #     }

# # #     .gr-box {
# # #         background: var(--card-bg);
# # #         color: var(--text-primary);
# # #         border-radius: 12px;
# # #         padding: 2rem;
# # #         border: 1px solid var(--border);
# # #         box-shadow: 0 4px 12px var(--shadow);
# # #     }

# # #     .gr-button-primary {
# # #         background: var(--primary);
# # #         color: white;
# # #         padding: 12px 24px;
# # #         border-radius: 8px;
# # #         transition: all 0.3s ease;
# # #         border: none;
# # #         font-weight: bold;
# # #     }

# # #     .gr-button-primary:hover {
# # #         transform: translateY(-1px);
# # #         box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
# # #         background: var(--primary-light);
# # #     }

# # #     footer {
# # #         text-align: center;
# # #         color: var(--text-secondary);
# # #         padding: 1rem;
# # #         font-size: 0.9em;
# # #     }

# # #     .gr-markdown h2 {
# # #         color: var(--primary);
# # #         margin-bottom: 0.5rem;
# # #         font-size: 1.8em;
# # #     }

# # #     .gr-markdown h3 {
# # #         color: var(--text-secondary);
# # #         margin-bottom: 1.5rem;
# # #         font-weight: normal;
# # #     }

# # #     #chatbot_container .chat-title h1, 
# # #     #chatbot_container .empty-chatbot {
# # #         color: var(--primary);
# # #     }

# # #     #input_nickname {
# # #         padding: 12px;
# # #         border-radius: 8px;
# # #         border: 1px solid var(--border);
# # #         background: var(--card-bg);
# # #         transition: all 0.3s ease;
# # #     }
    
# # #     #input_nickname:focus {
# # #         border-color: var(--primary);
# # #         box-shadow: 0 0 0 2px rgba(78, 107, 191, 0.2);
# # #         outline: none;
# # #     }

# # #     .chatbot-container .message.user {
# # #         background: #E8F0FE;
# # #         border-radius: 12px 12px 0 12px;
# # #     }

# # #     .chatbot-container .message.bot {
# # #         background: #F5F7FF;
# # #         border-radius: 12px 12px 12px 0;
# # #     }
# # #     """

# # # def create_ui():
# # #     """Create and configure the Gradio UI."""
# # #     with gr.Blocks(css=get_css(), theme=gr.themes.Soft()) as demo:
# # #         # Create a unique session ID for this browser tab
# # #         session_id = gr.State(value=f"session_{int(time.time())}_{os.urandom(4).hex()}")
        
# # #         # Registration section
# # #         with gr.Column(visible=True, elem_id="registration_container") as registration_container:
# # #             gr.Markdown(f"## Welcome to {APP_NAME}")
# # #             gr.Markdown("### Your privacy is important to us. Please provide a nickname to continue.")

# # #             with gr.Row():
# # #                 first_name = gr.Textbox(
# # #                     label="Nickname",
# # #                     placeholder="Enter your nickname",
# # #                     scale=1,
# # #                     elem_id="input_nickname"
# # #                 )

# # #             with gr.Row():
# # #                 submit_btn = gr.Button("Start Chatting", variant="primary", scale=2)

# # #             response_message = gr.Markdown()

# # #         # Chatbot section (initially hidden)
# # #         with gr.Column(visible=False, elem_id="chatbot_container") as chatbot_container:
# # #             # Create a custom chat interface to pass session_id to our function
# # #             chatbot = gr.Chatbot(
# # #                 elem_id="chatbot", 
# # #                 height=500,
# # #                 show_label=False
# # #             )
            
# # #             with gr.Row():
# # #                 msg = gr.Textbox(
# # #                     placeholder="Type your message here...",
# # #                     show_label=False,
# # #                     container=False,
# # #                     scale=9
# # #                 )
# # #                 submit = gr.Button("Send", scale=1, variant="primary")
            
# # #             examples = gr.Examples(
# # #                 examples=[
# # #                     "What resources are available for GBV victims?",
# # #                     "How can I report an incident?",
# # #                     "What are my legal rights?",
# # #                     "I need help, what should I do first?"
# # #                 ],
# # #                 inputs=msg
# # #             )

# # #             # Footer with version info
# # #             gr.Markdown(f"{APP_NAME} {APP_VERSION} © 2025")
            
# # #             # Handle chat message submission
# # #             def respond(message, chat_history, session_id):
# # #                 bot_message = ""
# # #                 for chunk in rag_memory_stream(message, chat_history, session_id):
# # #                     bot_message += chunk
# # #                 chat_history.append((message, bot_message))
# # #                 return "", chat_history
            
# # #             msg.submit(respond, [msg, chatbot, session_id], [msg, chatbot])
# # #             submit.click(respond, [msg, chatbot, session_id], [msg, chatbot])

# # #         # Handle user registration
# # #         submit_btn.click(
# # #             collect_user_info,
# # #             inputs=[first_name, session_id],
# # #             outputs=[response_message, chatbot_container, registration_container, chatbot]
# # #         )

# # #     return demo

# # # def launch_app():
# # #     """Launch the Gradio interface."""
# # #     ui = create_ui()
# # #     ui.launch(share=True)

# # # # Main execution
# # # if __name__ == "__main__":
# # #     try:
# # #         # Initialize and launch the assistant
# # #         initialize_assistant()
# # #         launch_app()
# # #     except Exception as e:
# # #         import traceback
# # #         print(f"❌ Fatal error initializing GBV Assistant: {e}")
# # #         print(traceback.format_exc())
        
# # #         # Create a minimal emergency UI to display the error
# # #         with gr.Blocks() as error_demo:
# # #             gr.Markdown("## System Error")
# # #             gr.Markdown(f"An error occurred while initializing the application: {str(e)}")
# # #             gr.Markdown("Please check your configuration and try again.")
            
# # #         error_demo.launch(share=True, inbrowser=True, debug=True)



# # ############################################################################################################


# # import os
# # from langchain_groq import ChatGroq
# # from langchain.prompts import ChatPromptTemplate, PromptTemplate
# # from langchain.output_parsers import ResponseSchema, StructuredOutputParser
# # from urllib.parse import urljoin, urlparse
# # import requests
# # from io import BytesIO
# # from langchain_chroma import Chroma
# # import requests
# # from bs4 import BeautifulSoup
# # from langchain_core.prompts import ChatPromptTemplate
# # import gradio as gr
# # from PyPDF2 import PdfReader
# # from langchain_huggingface import HuggingFaceEmbeddings

# # groq_api_key= os.environ.get('GBV')

# # embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

# # def scrape_websites(base_urls):
# #     try:
# #         visited_links = set()  # To avoid revisiting the same link
# #         content_by_url = {}  # Store content from each URL

# #         for base_url in base_urls:
# #             if not base_url.strip():
# #                 continue  # Skip empty or invalid URLs

# #             print(f"Scraping base URL: {base_url}")
# #             html_content = fetch_page_content(base_url)
# #             if html_content:
# #                 cleaned_content = clean_body_content(html_content)
# #                 content_by_url[base_url] = cleaned_content
# #                 visited_links.add(base_url)

# #                 # Extract and process all internal links
# #                 soup = BeautifulSoup(html_content, "html.parser")
# #                 links = extract_internal_links(base_url, soup)

# #                 for link in links:
# #                     if link not in visited_links:
# #                         print(f"Scraping link: {link}")
# #                         page_content = fetch_page_content(link)
# #                         if page_content:
# #                             cleaned_content = clean_body_content(page_content)
# #                             content_by_url[link] = cleaned_content
# #                             visited_links.add(link)

# #                         # If the link is a PDF file, extract its content
# #                         if link.lower().endswith('.pdf'):
# #                             print(f"Extracting PDF content from: {link}")
# #                             pdf_content = extract_pdf_text(link)
# #                             if pdf_content:
# #                                 content_by_url[link] = pdf_content

# #         return content_by_url

# #     except Exception as e:
# #         print(f"Error during scraping: {e}")
# #         return {}


# # def fetch_page_content(url):
# #     try:
# #         response = requests.get(url, timeout=10)
# #         response.raise_for_status()
# #         return response.text
# #     except requests.exceptions.RequestException as e:
# #         print(f"Error fetching {url}: {e}")
# #         return None


# # def extract_internal_links(base_url, soup):
# #     links = set()
# #     for anchor in soup.find_all("a", href=True):
# #         href = anchor["href"]
# #         full_url = urljoin(base_url, href)
# #         if is_internal_link(base_url, full_url):
# #             links.add(full_url)
# #     return links


# # def is_internal_link(base_url, link_url):
# #     base_netloc = urlparse(base_url).netloc
# #     link_netloc = urlparse(link_url).netloc
# #     return base_netloc == link_netloc


# # def extract_pdf_text(pdf_url):
# #     try:
# #         response = requests.get(pdf_url)
# #         response.raise_for_status()
# #         with BytesIO(response.content) as file:
# #             reader = PdfReader(file)
# #             pdf_text = ""
# #             for page in reader.pages:
# #                 pdf_text += page.extract_text()

# #         return pdf_text if pdf_text else None
# #     except requests.exceptions.RequestException as e:
# #         print(f"Error fetching PDF {pdf_url}: {e}")
# #         return None
# #     except Exception as e:
# #         print(f"Error reading PDF {pdf_url}: {e}")
# #         return None


# # def clean_body_content(html_content):
# #     soup = BeautifulSoup(html_content, "html.parser")

    
# #     for script_or_style in soup(["script", "style"]):
# #         script_or_style.extract()

    
# #     cleaned_content = soup.get_text(separator="\n")
# #     cleaned_content = "\n".join(
# #         line.strip() for line in cleaned_content.splitlines() if line.strip()
# #     )
# #     return cleaned_content


# # if __name__ == "__main__":
# #     website = ["https://haguruka.org.rw/"
              
# #                ] 
# #     all_content = scrape_websites(website)

# #     temp_list = []
# #     for url, content in all_content.items():
# #         temp_list.append((url, content)) 

    
# # processed_texts = []

    
# # for element in temp_list:
# #     if isinstance(element, tuple):
# #         url, content = element  
# #         processed_texts.append(f"url: {url}, content: {content}")
# #     elif isinstance(element, str):
# #         processed_texts.append(element)
# #     else:
# #         processed_texts.append(str(element))

# # def chunk_string(s, chunk_size=1000):
# #     return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]

# # chunked_texts = []

# # for text in processed_texts:
# #   chunked_texts.extend(chunk_string(text))


    


# # vectorstore = Chroma(
# #     collection_name="GBVR_Dataset",
# #     embedding_function=embed_model,
# #     persist_directory="./",
# # )

# # vectorstore.get().keys()

# # vectorstore.add_texts(chunked_texts)


# # template = ("""
# #     You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:

# #     1. **Warm & Natural Interaction**  
# #        - If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.  
# #        - Example responses:  
# #          - "😊 Good morning! How can I assist you today?"  
# #          - "Hello! What can I do for you? 🚀"  

# #     2. **Precise Information Extraction**  
# #        - Provide only the relevant details from the given context: {context}.  
# #        - Do not generate extra content or assumptions beyond the provided information.  

# #     3. **Conversational & Engaging Tone**  
# #        - Keep responses friendly, natural, and engaging.  
# #        - Use occasional emojis (e.g., 😊, 🚀) to make interactions more lively.  

# #     4. **Awareness of Real-Time Context**  
# #        - If necessary, acknowledge the current date and time to show awareness of real-world updates.  

# #     5. **Handling Missing Information**  
# #        - If no relevant information exists in the context, respond politely:  
# #          - "I don't have that information at the moment, but I'm happy to help with something else! 😊"  

# #     6. **Personalized Interaction**  
# #        - If user history is available, tailor responses based on their previous interactions for a more natural and engaging conversation.  

# #     7. **Direct, Concise Responses**  
# #        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.  

# #     8. **Extracting Relevant Links**  
# #        - If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.  
# #        - Example response:  
# #          - "Here is the link you requested: [URL]"  

# #     **Context:** {context}  
# #     **User's Question:** {question}  
# #     **Your Response:**  
# # """)


# # rag_prompt = PromptTemplate.from_template(template)

# # retriever = vectorstore.as_retriever()

# # from langchain_core.output_parsers import StrOutputParser
# # from langchain_core.runnables import RunnablePassthrough

# # llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key )

# # rag_chain = (
# #     {"context": retriever, "question": RunnablePassthrough()}
# #     | rag_prompt
# #     | llm
# #     | StrOutputParser()
# # )


# # # Define the RAG memory stream function
# # def rag_memory_stream(message, history):
# #     partial_text = ""
# #     for new_text in rag_chain.stream(message):  # Replace with actual streaming logic
# #         partial_text += new_text
# #         yield partial_text

# # # Title with emojis
# # title = "GBVR Chatbot"


# # # Custom CSS for styling the interface
# # custom_css = """
# # body {
# #     font-family: "Arial", serif;
# # }
# # .gradio-container {
# #     font-family: "Times New Roman", serif;
# # }
# # .gr-button {
# #     background-color: #007bff; /* Blue button */
# #     color: white;
# #     border: none;
# #     border-radius: 5px;
# #     font-size: 16px;
# #     padding: 10px 20px;
# #     cursor: pointer;
# # }
# # .gr-textbox:focus, .gr-button:focus {
# #     outline: none; /* Remove outline focus for a cleaner look */
# # }

# # """

# # # Create the Chat Interface
# # demo = gr.ChatInterface(
# #     fn=rag_memory_stream,
# #     title=title,
# #     fill_height=True,
# #     theme="soft",
# #     css=custom_css, # Apply the custom CSS
# # )

# # # Launch the app
# # if __name__ == "__main__":
# #     demo.launch(share=True, inbrowser=True, debug=True)


# import os
# from langchain_groq import ChatGroq
# from langchain.prompts import ChatPromptTemplate, PromptTemplate
# from langchain.output_parsers import ResponseSchema, StructuredOutputParser
# from urllib.parse import urljoin, urlparse
# import requests
# from io import BytesIO
# from langchain_chroma import Chroma
# import requests
# from bs4 import BeautifulSoup
# from langchain_core.prompts import ChatPromptTemplate
# import gradio as gr
# from PyPDF2 import PdfReader
# from langchain_huggingface import HuggingFaceEmbeddings
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.runnables import RunnablePassthrough

# # Simple session management
# class SessionManager:
#     def __init__(self):
#         self.sessions = {}
    
#     def get_or_create_session(self, session_id):
#         if session_id not in self.sessions:
#             self.sessions[session_id] = []
#         return self.sessions[session_id]
    
#     def add_interaction(self, session_id, user_message, ai_response):
#         session = self.get_or_create_session(session_id)
#         session.append({"user": user_message, "ai": ai_response})
    
#     def get_history(self, session_id, max_turns=5):
#         session = self.get_or_create_session(session_id)
#         recent_history = session[-max_turns:] if len(session) > max_turns else session
        
#         history_text = ""
#         for interaction in recent_history:
#             history_text += f"User: {interaction['user']}\n"
#             history_text += f"Assistant: {interaction['ai']}\n\n"
        
#         return history_text.strip()

# # Initialize session manager
# session_manager = SessionManager()

# groq_api_key= os.environ.get('GBV')

# embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

# def scrape_websites(base_urls):
#     try:
#         visited_links = set()  # To avoid revisiting the same link
#         content_by_url = {}  # Store content from each URL

#         for base_url in base_urls:
#             if not base_url.strip():
#                 continue  # Skip empty or invalid URLs

#             print(f"Scraping base URL: {base_url}")
#             html_content = fetch_page_content(base_url)
#             if html_content:
#                 cleaned_content = clean_body_content(html_content)
#                 content_by_url[base_url] = cleaned_content
#                 visited_links.add(base_url)

#                 # Extract and process all internal links
#                 soup = BeautifulSoup(html_content, "html.parser")
#                 links = extract_internal_links(base_url, soup)

#                 for link in links:
#                     if link not in visited_links:
#                         print(f"Scraping link: {link}")
#                         page_content = fetch_page_content(link)
#                         if page_content:
#                             cleaned_content = clean_body_content(page_content)
#                             content_by_url[link] = cleaned_content
#                             visited_links.add(link)

#                         # If the link is a PDF file, extract its content
#                         if link.lower().endswith('.pdf'):
#                             print(f"Extracting PDF content from: {link}")
#                             pdf_content = extract_pdf_text(link)
#                             if pdf_content:
#                                 content_by_url[link] = pdf_content

#         return content_by_url

#     except Exception as e:
#         print(f"Error during scraping: {e}")
#         return {}


# def fetch_page_content(url):
#     try:
#         response = requests.get(url, timeout=10)
#         response.raise_for_status()
#         return response.text
#     except requests.exceptions.RequestException as e:
#         print(f"Error fetching {url}: {e}")
#         return None


# def extract_internal_links(base_url, soup):
#     links = set()
#     for anchor in soup.find_all("a", href=True):
#         href = anchor["href"]
#         full_url = urljoin(base_url, href)
#         if is_internal_link(base_url, full_url):
#             links.add(full_url)
#     return links


# def is_internal_link(base_url, link_url):
#     base_netloc = urlparse(base_url).netloc
#     link_netloc = urlparse(link_url).netloc
#     return base_netloc == link_netloc


# def extract_pdf_text(pdf_url):
#     try:
#         response = requests.get(pdf_url)
#         response.raise_for_status()
#         with BytesIO(response.content) as file:
#             reader = PdfReader(file)
#             pdf_text = ""
#             for page in reader.pages:
#                 pdf_text += page.extract_text()

#         return pdf_text if pdf_text else None
#     except requests.exceptions.RequestException as e:
#         print(f"Error fetching PDF {pdf_url}: {e}")
#         return None
#     except Exception as e:
#         print(f"Error reading PDF {pdf_url}: {e}")
#         return None


# def clean_body_content(html_content):
#     soup = BeautifulSoup(html_content, "html.parser")

    
#     for script_or_style in soup(["script", "style"]):
#         script_or_style.extract()

    
#     cleaned_content = soup.get_text(separator="\n")
#     cleaned_content = "\n".join(
#         line.strip() for line in cleaned_content.splitlines() if line.strip()
#     )
#     return cleaned_content


# if __name__ == "__main__":
#     website = ["https://haguruka.org.rw/"
              
#                ] 
#     all_content = scrape_websites(website)

#     temp_list = []
#     for url, content in all_content.items():
#         temp_list.append((url, content)) 

    
# processed_texts = []

    
# for element in temp_list:
#     if isinstance(element, tuple):
#         url, content = element  
#         processed_texts.append(f"url: {url}, content: {content}")
#     elif isinstance(element, str):
#         processed_texts.append(element)
#     else:
#         processed_texts.append(str(element))

# def chunk_string(s, chunk_size=1000):
#     return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]

# chunked_texts = []

# for text in processed_texts:
#   chunked_texts.extend(chunk_string(text))


# vectorstore = Chroma(
#     collection_name="GBVR_Dataset",
#     embedding_function=embed_model,
#     persist_directory="./",
# )

# vectorstore.get().keys()

# vectorstore.add_texts(chunked_texts)

# # Updated template to include conversation history
# template = ("""
#     You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:

#     1. **Warm & Natural Interaction**  
#        - If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.  
#        - Example responses:  
#          - "😊 Good morning! How can I assist you today?"  
#          - "Hello! What can I do for you? 🚀"  

#     2. **Precise Information Extraction**  
#        - Provide only the relevant details from the given context: {context}.  
#        - Do not generate extra content or assumptions beyond the provided information.  

#     3. **Conversational & Engaging Tone**  
#        - Keep responses friendly, natural, and engaging.  
#        - Use occasional emojis (e.g., 😊, 🚀) to make interactions more lively.  

#     4. **Awareness of Real-Time Context**  
#        - If necessary, acknowledge the current date and time to show awareness of real-world updates.  

#     5. **Handling Missing Information**  
#        - If no relevant information exists in the context, respond politely:  
#          - "I don't have that information at the moment, but I'm happy to help with something else! 😊"  

#     6. **Personalized Interaction**  
#        - Use the conversation history to provide more personalized and contextually relevant responses.
#        - Previous conversation history: {conversation_history}

#     7. **Direct, Concise Responses**  
#        - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.  

#     8. **Extracting Relevant Links**  
#        - If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.  
#        - Example response:  
#          - "Here is the link you requested: [URL]"  

#     **Context:** {context}  
#     **User's Question:** {question}  
#     **Your Response:**  
# """)


# rag_prompt = PromptTemplate.from_template(template)

# retriever = vectorstore.as_retriever()

# llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)

# # Dictionary to store user sessions with session IDs
# user_sessions = {}

# # Define the RAG chain with session history
# def rag_chain(question, session_id="default"):
#     # Get conversation history if available
#     conversation_history = session_manager.get_history(session_id)
    
#     # Get context from retriever
#     context_docs = retriever.invoke(question)
#     context = "\n".join(doc.page_content for doc in context_docs)
    
#     # Create prompt with history
#     prompt = rag_prompt.format(
#         context=context,
#         question=question,
#         conversation_history=conversation_history
#     )
    
#     # Generate response
#     response = llm.invoke(prompt).content
    
#     # Store the interaction
#     session_manager.add_interaction(session_id, question, response)
    
#     return response

# # Define the RAG memory stream function
# def rag_memory_stream(message, history):
#     # Generate a session ID based on the first message if not exists
#     session_id = None
#     for msg in history:
#         if msg[0]:  # If there's a user message
#             # Use first few characters of first message as simple session ID
#             session_id = hash(msg[0][:20]) if session_id is None else session_id
#             break
    
#     # Default session ID if history is empty
#     if session_id is None:
#         session_id = "default_session"
    
#     # Process the message and get response
#     response = rag_chain(message, str(session_id))
    
#     # Stream the response word by word
#     partial_text = ""
#     words = response.split(' ')
#     for word in words:
#         partial_text += word + " "
#         yield partial_text.strip()

# # Title with emojis
# title = "GBVR Chatbot"

# # Custom CSS for styling the interface
# custom_css = """
# body {
#     font-family: "Arial", serif;
# }
# .gradio-container {
#     font-family: "Times New Roman", serif;
# }
# .gr-button {
#     background-color: #007bff; /* Blue button */
#     color: white;
#     border: none;
#     border-radius: 5px;
#     font-size: 16px;
#     padding: 10px 20px;
#     cursor: pointer;
# }
# .gr-textbox:focus, .gr-button:focus {
#     outline: none; /* Remove outline focus for a cleaner look */
# }
# """

# # Create the Chat Interface
# demo = gr.ChatInterface(
#     fn=rag_memory_stream,
#     title=title,
#     fill_height=True,
#     theme="soft",
#     css=custom_css, # Apply the custom CSS
# )

# # Launch the app
# if __name__ == "__main__":
#     demo.launch(share=True, inbrowser=True, debug=True)
import os
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
from urllib.parse import urljoin, urlparse
import requests
from io import BytesIO
from langchain_chroma import Chroma
import requests
from bs4 import BeautifulSoup
from langchain_core.prompts import ChatPromptTemplate
import gradio as gr
from PyPDF2 import PdfReader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Simple session management
class SessionManager:
    def __init__(self):
        self.sessions = {}
    
    def get_or_create_session(self, session_id):
        if session_id not in self.sessions:
            self.sessions[session_id] = []
        return self.sessions[session_id]
    
    def add_interaction(self, session_id, user_message, ai_response):
        session = self.get_or_create_session(session_id)
        session.append({"user": user_message, "ai": ai_response})
    
    def get_history(self, session_id, max_turns=5):
        session = self.get_or_create_session(session_id)
        recent_history = session[-max_turns:] if len(session) > max_turns else session
        
        history_text = ""
        for interaction in recent_history:
            history_text += f"User: {interaction['user']}\n"
            history_text += f"Assistant: {interaction['ai']}\n\n"
        
        return history_text.strip()

# Initialize session manager
session_manager = SessionManager()

groq_api_key= os.environ.get('GBV')

embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

def scrape_websites(base_urls):
    try:
        visited_links = set()  # To avoid revisiting the same link
        content_by_url = {}  # Store content from each URL

        for base_url in base_urls:
            if not base_url.strip():
                continue  # Skip empty or invalid URLs

            print(f"Scraping base URL: {base_url}")
            html_content = fetch_page_content(base_url)
            if html_content:
                cleaned_content = clean_body_content(html_content)
                content_by_url[base_url] = cleaned_content
                visited_links.add(base_url)

                # Extract and process all internal links
                soup = BeautifulSoup(html_content, "html.parser")
                links = extract_internal_links(base_url, soup)

                for link in links:
                    if link not in visited_links:
                        print(f"Scraping link: {link}")
                        page_content = fetch_page_content(link)
                        if page_content:
                            cleaned_content = clean_body_content(page_content)
                            content_by_url[link] = cleaned_content
                            visited_links.add(link)

                        # If the link is a PDF file, extract its content
                        if link.lower().endswith('.pdf'):
                            print(f"Extracting PDF content from: {link}")
                            pdf_content = extract_pdf_text(link)
                            if pdf_content:
                                content_by_url[link] = pdf_content

        return content_by_url

    except Exception as e:
        print(f"Error during scraping: {e}")
        return {}


def fetch_page_content(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None


def extract_internal_links(base_url, soup):
    links = set()
    for anchor in soup.find_all("a", href=True):
        href = anchor["href"]
        full_url = urljoin(base_url, href)
        if is_internal_link(base_url, full_url):
            links.add(full_url)
    return links


def is_internal_link(base_url, link_url):
    base_netloc = urlparse(base_url).netloc
    link_netloc = urlparse(link_url).netloc
    return base_netloc == link_netloc


def extract_pdf_text(pdf_url):
    try:
        response = requests.get(pdf_url)
        response.raise_for_status()
        with BytesIO(response.content) as file:
            reader = PdfReader(file)
            pdf_text = ""
            for page in reader.pages:
                pdf_text += page.extract_text()

        return pdf_text if pdf_text else None
    except requests.exceptions.RequestException as e:
        print(f"Error fetching PDF {pdf_url}: {e}")
        return None
    except Exception as e:
        print(f"Error reading PDF {pdf_url}: {e}")
        return None


def clean_body_content(html_content):
    soup = BeautifulSoup(html_content, "html.parser")

    
    for script_or_style in soup(["script", "style"]):
        script_or_style.extract()

    
    cleaned_content = soup.get_text(separator="\n")
    cleaned_content = "\n".join(
        line.strip() for line in cleaned_content.splitlines() if line.strip()
    )
    return cleaned_content


if __name__ == "__main__":
    website = ["https://haguruka.org.rw/"
              
               ] 
    all_content = scrape_websites(website)

    temp_list = []
    for url, content in all_content.items():
        temp_list.append((url, content)) 

    
processed_texts = []

    
for element in temp_list:
    if isinstance(element, tuple):
        url, content = element  
        processed_texts.append(f"url: {url}, content: {content}")
    elif isinstance(element, str):
        processed_texts.append(element)
    else:
        processed_texts.append(str(element))

def chunk_string(s, chunk_size=1000):
    return [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]

chunked_texts = []

for text in processed_texts:
  chunked_texts.extend(chunk_string(text))


vectorstore = Chroma(
    collection_name="GBVR_Datast",
    embedding_function=embed_model,
    persist_directory="./",
)

vectorstore.get().keys()

vectorstore.add_texts(chunked_texts)

# Updated template to include conversation history
template = ("""
    You are a friendly, intelligent, and conversational AI assistant designed to provide accurate, engaging, and human-like responses based on the given context. Your goal is to extract relevant details from the provided context: {context} and assist the user effectively. Follow these guidelines:

    1. **Warm & Natural Interaction**  
       - If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond warmly and acknowledge them.  
       - Example responses:  
         - "😊 Good morning! How can I assist you today?"  
         - "Hello! What can I do for you? 🚀"  

    2. **Precise Information Extraction**  
       - Provide only the relevant details from the given context: {context}.  
       - Do not generate extra content or assumptions beyond the provided information.  

    3. **Conversational & Engaging Tone**  
       - Keep responses friendly, natural, and engaging.  
       - Use occasional emojis (e.g., 😊, 🚀) to make interactions more lively.  

    4. **Awareness of Real-Time Context**  
       - If necessary, acknowledge the current date and time to show awareness of real-world updates.  

    5. **Handling Missing Information**  
       - If no relevant information exists in the context, respond politely:  
         - "I don't have that information at the moment, but I'm happy to help with something else! 😊"  

    6. **Personalized Interaction**  
       - Use the conversation history to provide more personalized and contextually relevant responses.
       - Previous conversation history: {conversation_history}

    7. **Direct, Concise Responses**  
       - If the user requests specific data, provide only the requested details without unnecessary explanations unless asked.  

    8. **Extracting Relevant Links**  
       - If the user asks for a link related to their request `{question}`, extract the most relevant URL from `{context}` and provide it directly.  
       - Example response:  
         - "Here is the link you requested: [URL]"  

    **Context:** {context}  
    **User's Question:** {question}  
    **Your Response:**  
""")


rag_prompt = PromptTemplate.from_template(template)

retriever = vectorstore.as_retriever()

llm = ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)

# Dictionary to store user sessions with session IDs
user_sessions = {}

# Define the RAG chain with session history
def rag_chain(question, session_id="default"):
    # Get conversation history if available
    conversation_history = session_manager.get_history(session_id)
    
    # Get context from retriever
    context_docs = retriever.invoke(question)
    context = "\n".join(doc.page_content for doc in context_docs)
    
    # Create prompt with history
    prompt = rag_prompt.format(
        context=context,
        question=question,
        conversation_history=conversation_history
    )
    
    # Generate response
    response = llm.invoke(prompt).content
    
    # Store the interaction
    session_manager.add_interaction(session_id, question, response)
    
    return response

# Define the RAG memory stream function
def rag_memory_stream(message, history):
    # Generate a session ID based on the first message if not exists
    session_id = None
    for msg in history:
        if msg[0]:  # If there's a user message
            # Use first few characters of first message as simple session ID
            session_id = hash(msg[0][:20]) if session_id is None else session_id
            break
    
    # Default session ID if history is empty
    if session_id is None:
        session_id = "default_session"
    
    # Process the message and get response
    response = rag_chain(message, str(session_id))
    
    # Stream the response word by word
    partial_text = ""
    words = response.split(' ')
    for word in words:
        partial_text += word + " "
        yield partial_text.strip()

# Title with emojis
title = "GBVR Chatbot"

# Custom CSS for styling the interface
custom_css = """
/* Custom CSS for styling the interface */
body {
    font-family: "Arial", serif;
}

.gradio-container {
    font-family: "Times New Roman", serif;
}

.gr-button {
    background-color: #007bff; /* Blue button */
    color: white;
    border: none;
    border-radius: 5px;
    font-size: 16px;
    padding: 10px 20px;
    cursor: pointer;
}

.gr-textbox:focus, .gr-button:focus {
    outline: none; /* Remove outline focus for a cleaner look */
}

/* Specific CSS for the welcome message */
.gradio-description {
    font-size: 30px; /* Set font size for the welcome message */
    font-family: "Arial", sans-serif;
    text-align: center; /* Optional: Center-align the text */
    padding: 20px; /* Optional: Add padding around the welcome message */
}

"""

# Generate a simple welcome message using the LLM
def generate_welcome_message():
    welcome_prompt = """
    Generate a short, simple welcome message for a chatbot about Gender-Based Violence Resources in Rwanda.
    Keep it under 3 sentences, and use simple language.
    Make it warm and supportive but direct and easy to read.
    """
    
    # Get the welcome message from the LLM
    welcome_message = llm.invoke(welcome_prompt).content
    return welcome_message

# Create simple welcome message
welcome_msg = generate_welcome_message()

# Create the Chat Interface with welcome message
demo = gr.ChatInterface(
    fn=rag_memory_stream,
    title=title,
    fill_height=True,
    theme="soft",
    css=custom_css, # Apply the custom CSS
    description=welcome_msg
)


# Launch the app
if __name__ == "__main__":
    demo.launch(share=True, inbrowser=True, debug=True)