RAG-PDF-AI

Sleeping

App Files Files Community

DHEIVER commited on Mar 19

Commit

1013dbf

verified ·

1 Parent(s): 649297f

Update app.py

Browse files

Files changed (1) hide show

app.py +286 -114

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import os
-import secrets
 from functools import partial
 api_token = os.getenv("HF_TOKEN")
@@ -16,15 +15,6 @@ from langchain_community.llms import HuggingFaceEndpoint
 list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
-# Simulated user database (replace with a real database in production)
-USER_DB = {
-    "admin": {"password": "securepass123", "email": "[email protected]"},
-    "user1": {"password": "userpass456", "email": "[email protected]"}
-}
-# Session storage (in-memory for simplicity)
-SESSIONS = {}
 # Load and split PDF document
 def load_doc(list_file_path):
     loaders = [PyPDFLoader(x) for x in list_file_path]
@@ -46,7 +36,7 @@ def initialize_database(list_file_obj, progress=gr.Progress()):
     list_file_path = [x.name for x in list_file_obj if x is not None]
     doc_splits = load_doc(list_file_path)
     vector_db = create_db(doc_splits)
-    return vector_db, "Database created!"
 # Initialize langchain LLM chain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
@@ -86,7 +76,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     llm_name = list_llm[llm_option]
     qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
-    return qa_chain, "QA chain initialized. Chatbot is ready!"
 def format_chat_history(message, chat_history):
     formatted_chat_history = []
@@ -128,114 +118,296 @@ def conversation(qa_chain, message, history, language):
     new_history = history + [(message, response_answer)]
     return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
-# Login function
-def login(username, password):
-    # Debugging: Uncomment the next line to see what’s being entered
-    # print(f"Attempting login with username: {username}, password: {password}")
-    if username in USER_DB and USER_DB[username]["password"] == password:
-        session_token = secrets.token_hex(16)
-        SESSIONS[session_token] = username
-        return True, session_token, f"Welcome, {username}! You are now logged in."
-    else:
-        return False, None, "Invalid username or password. Please try again."
-# Logout function
-def logout(session_token):
-    if session_token in SESSIONS:
-        del SESSIONS[session_token]
-    return False, None, "You have been logged out."
-# Main demo with modern login
-def demo():
-    with gr.Blocks(
-        theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate"),
-        css="""
-            .login-box { max-width: 400px; margin: 50px auto; padding: 20px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); }
-            .title { text-align: center; font-size: 2em; margin-bottom: 20px; }
-            .button { background-color: #007bff; color: white; border-radius: 5px; }
-            .button:hover { background-color: #0056b3; }
-        """
-    ) as demo:
         # State variables
         vector_db = gr.State()
         qa_chain = gr.State()
-        logged_in = gr.State(value=False)
-        session_token = gr.State(value=None)
-        # Login interface
-        with gr.Column(elem_classes="login-box", visible=True) as login_col:
-            gr.HTML("<h1 class='title'>RAG PDF Chatbot Login</h1>")
-            username = gr.Textbox(label="Username", placeholder="Enter your username", lines=1)
-            password = gr.Textbox(label="Password", type="password", placeholder="Enter your password", lines=1)
-            login_btn = gr.Button("Login", elem_classes="button")
-            login_message = gr.Textbox(value="Please log in to access the chatbot.", show_label=False, interactive=False)
-        # Chatbot interface (hidden until login)
-        with gr.Column(visible=False) as chatbot_col:
-            with gr.Row():
-                gr.HTML("<h1 class='title'>RAG PDF Chatbot</h1>")
-                logout_btn = gr.Button("Logout", elem_classes="button", scale=0)
-            gr.Markdown("""<b>Query your PDF documents!</b> This AI agent is designed to perform retrieval augmented generation (RAG) on PDF documents. \
-            <b>Please do not upload confidential documents.</b>""")
-            with gr.Row():
-                with gr.Column(scale=86):
-                    gr.Markdown("<b>Step 1 - Upload PDF documents and Initialize RAG pipeline</b>")
-                    document = gr.Files(height=300, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload PDF documents")
-                    db_btn = gr.Button("Create vector database", elem_classes="button")
-                    db_progress = gr.Textbox(value="Not initialized", show_label=False)
-                    gr.Markdown("<b>Select Large Language Model (LLM) and input parameters</b>")
-                    llm_btn = gr.Radio(list_llm_simple, label="Available LLMs", value=list_llm_simple[0], type="index")
-                    with gr.Accordion("LLM input parameters", open=False):
-                        slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature", interactive=True)
-                        slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max New Tokens", interactive=True)
-                        slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k", interactive=True)
-                    qachain_btn = gr.Button("Initialize Question Answering Chatbot", elem_classes="button")
-                    llm_progress = gr.Textbox(value="Not initialized", show_label=False)
-                with gr.Column(scale=200):
-                    gr.Markdown("<b>Step 2 - Chat with your Document</b>")
-                    language_selector = gr.Radio(["English", "Português"], label="Select Language", value="English")
-                    chatbot = gr.Chatbot(height=505)
-                    with gr.Accordion("Relevant context from the source document", open=False):
-                        doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
-                        source1_page = gr.Number(label="Page", scale=1)
-                        doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
-                        source2_page = gr.Number(label="Page", scale=1)
-                        doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
-                        source3_page = gr.Number(label="Page", scale=1)
-                    msg = gr.Textbox(placeholder="Ask a question", container=True)
-                    submit_btn = gr.Button("Submit", elem_classes="button")
-                    clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
-        # Login event
-        login_btn.click(
-            fn=login,
-            inputs=[username, password],
-            outputs=[logged_in, session_token, login_message]
-        ).then(
-            fn=lambda logged: (gr.update(visible=not logged), gr.update(visible=logged)),
-            inputs=[logged_in],
-            outputs=[login_col, chatbot_col],
-            queue=False
-        )
-        # Logout event
-        logout_btn.click(
-            fn=logout,
-            inputs=[session_token],
-            outputs=[logged_in, session_token, login_message]
-        ).then(
-            fn=lambda logged: (gr.update(visible=not logged), gr.update(visible=logged)),
-            inputs=[logged_in],
-            outputs=[login_col, chatbot_col],
-            queue=False
-        ).then(
-            fn=lambda: gr.update(value="Please log in to access the chatbot."),
-            inputs=None,
-            outputs=[login_message],
-            queue=False
-        )
         # Preprocessing events
         db_btn.click(initialize_database, inputs=[document], outputs=[vector_db, db_progress])

 import gradio as gr
 import os
 from functools import partial
 api_token = os.getenv("HF_TOKEN")
 list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
 list_llm_simple = [os.path.basename(llm) for llm in list_llm]
 # Load and split PDF document
 def load_doc(list_file_path):
     loaders = [PyPDFLoader(x) for x in list_file_path]
     list_file_path = [x.name for x in list_file_obj if x is not None]
     doc_splits = load_doc(list_file_path)
     vector_db = create_db(doc_splits)
+    return vector_db, "Database created successfully! ✅"
 # Initialize langchain LLM chain
 def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     llm_name = list_llm[llm_option]
     qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
+    return qa_chain, "QA chain initialized. Chatbot is ready! 🚀"
 def format_chat_history(message, chat_history):
     formatted_chat_history = []
     new_history = history + [(message, response_answer)]
     return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
+# Main demo with enhanced UI
+def demo():
+    # Custom CSS
+    custom_css = """
+    /* Global styles */
+    body {
+        font-family: 'Inter', sans-serif;
+        color: #333;
+        background-color: #f9fafb;
+    }
+    /* Header styles */
+    .header {
+        text-align: center;
+        padding: 20px 0;
+        margin-bottom: 20px;
+        background: linear-gradient(90deg, #3b82f6, #2563eb);
+        color: white;
+        border-radius: 10px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .header h1 {
+        font-size: 2.5rem;
+        margin: 0;
+        padding: 0;
+    }
+    .header p {
+        font-size: 1.1rem;
+        margin: 10px 0 0;
+        opacity: 0.9;
+    }
+    /* Card styles */
+    .card {
+        background-color: white;
+        border-radius: 10px;
+        padding: 20px;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
+        margin-bottom: 20px;
+    }
+    /* Section titles */
+    .section-title {
+        font-size: 1.25rem;
+        font-weight: 600;
+        margin-bottom: 15px;
+        color: #1e40af;
+        display: flex;
+        align-items: center;
+    }
+    .section-title svg {
+        margin-right: 8px;
+    }
+    /* Buttons */
+    .primary-button {
+        background: linear-gradient(90deg, #3b82f6, #2563eb);
+        color: white;
+        border: none;
+        padding: 10px 20px;
+        border-radius: 8px;
+        font-weight: 500;
+        cursor: pointer;
+        transition: all 0.2s ease;
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+    }
+    .primary-button:hover {
+        background: linear-gradient(90deg, #2563eb, #1d4ed8);
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
+        transform: translateY(-1px);
+    }
+    /* Status indicators */
+    .status {
+        padding: 8px 12px;
+        border-radius: 6px;
+        font-size: 0.9rem;
+        font-weight: 500;
+    }
+    .status-success {
+        background-color: #d1fae5;
+        color: #065f46;
+    }
+    .status-waiting {
+        background-color: #fef3c7;
+        color: #92400e;
+    }
+    .status-error {
+        background-color: #fee2e2;
+        color: #b91c1c;
+    }
+    /* Chat container */
+    .chat-container {
+        border-radius: 10px;
+        border: 1px solid #e5e7eb;
+        overflow: hidden;
+    }
+    /* Document upload area */
+    .upload-area {
+        border: 2px dashed #d1d5db;
+        border-radius: 8px;
+        padding: 20px;
+        text-align: center;
+        background-color: #f9fafb;
+        transition: all 0.2s ease;
+    }
+    .upload-area:hover {
+        border-color: #3b82f6;
+        background-color: #eff6ff;
+    }
+    /* Parameter sliders */
+    .parameter-slider {
+        margin-bottom: 15px;
+    }
+    /* Reference boxes */
+    .reference-box {
+        background-color: #f3f4f6;
+        border-left: 4px solid #3b82f6;
+        padding: 10px 15px;
+        margin-bottom: 10px;
+        border-radius: 4px;
+    }
+    .reference-box-title {
+        font-weight: 600;
+        color: #1e40af;
+        margin-bottom: 5px;
+        display: flex;
+        justify-content: space-between;
+    }
+    .page-number {
+        background-color: #dbeafe;
+        color: #1e40af;
+        padding: 2px 8px;
+        border-radius: 12px;
+        font-size: 0.8rem;
+    }
+    /* Responsive adjustments */
+    @media (max-width: 768px) {
+        .header h1 {
+            font-size: 1.8rem;
+        }
+    }
+    """
+    # HTML Components
+    header_html = """
+    <div class="header">
+        <h1>📚 RAG PDF Chatbot</h1>
+        <p>Query your documents with AI-powered search and generation</p>
+    </div>
+    """
+    upload_html = """
+    <div class="section-title">
+        <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
+            <polyline points="17 8 12 3 7 8"></polyline>
+            <line x1="12" y1="3" x2="12" y2="15"></line>
+        </svg>
+        Upload your PDF documents
+    </div>
+    <p>Select one or more PDF files to analyze and chat with.</p>
+    """
+    model_html = """
+    <div class="section-title">
+        <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <path d="M12 2L2 7l10 5 10-5-10-5z"></path>
+            <path d="M2 17l10 5 10-5"></path>
+            <path d="M2 12l10 5 10-5"></path>
+        </svg>
+        Select AI Model
+    </div>
+    <p>Choose the language model that will process your questions.</p>
+    """
+    chat_html = """
+    <div class="section-title">
+        <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
+        </svg>
+        Chat with your Documents
+    </div>
+    <p>Ask questions about your uploaded documents to get AI-powered answers.</p>
+    """
+    reference_html = """
+    <div class="section-title">
+        <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <path d="M2 3h6a4 4 0 0 1 4 4v14a3 3 0 0 0-3-3H2z"></path>
+            <path d="M22 3h-6a4 4 0 0 0-4 4v14a3 3 0 0 1 3-3h7z"></path>
+        </svg>
+        Document References
+    </div>
+    <p>These are the relevant sections from your documents that the AI used to generate its response.</p>
+    """
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="blue", neutral_hue="slate"), css=custom_css) as demo:
         # State variables
         vector_db = gr.State()
         qa_chain = gr.State()
+        # Header
+        gr.HTML(header_html)
+        with gr.Row():
+            # Left column - Setup
+            with gr.Column(scale=1):
+                with gr.Box(elem_classes="card"):
+                    gr.HTML(upload_html)
+                    document = gr.Files(height=200, file_count="multiple", file_types=["pdf"], interactive=True)
+                    db_btn = gr.Button("Create Vector Database", elem_classes="primary-button")
+                    db_progress = gr.Textbox(value="Not initialized", show_label=False, elem_classes="status status-waiting")
+                with gr.Box(elem_classes="card"):
+                    gr.HTML(model_html)
+                    llm_btn = gr.Radio(list_llm_simple, label="", value=list_llm_simple[0], type="index")
+                    with gr.Accordion("Advanced Parameters", open=False):
+                        slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature", interactive=True, elem_classes="parameter-slider")
+                        slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max Tokens", interactive=True, elem_classes="parameter-slider")
+                        slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Top-K", interactive=True, elem_classes="parameter-slider")
+                    qachain_btn = gr.Button("Initialize Chatbot", elem_classes="primary-button")
+                    llm_progress = gr.Textbox(value="Not initialized", show_label=False, elem_classes="status status-waiting")
+                with gr.Box(elem_classes="card"):
+                    gr.Markdown("### Usage Instructions")
+                    gr.Markdown("""
+                    1. Upload one or more PDF documents
+                    2. Click "Create Vector Database"
+                    3. Select your preferred AI model
+                    4. Click "Initialize Chatbot"
+                    5. Start asking questions about your documents
+                    **Note:** The system will analyze your documents and use AI to answer questions based on their content.
+                    """)
+            # Right column - Chat
+            with gr.Column(scale=1.5):
+                with gr.Box(elem_classes="card"):
+                    gr.HTML(chat_html)
+                    language_selector = gr.Radio(["English", "Português"], label="Response Language", value="English")
+                    chatbot = gr.Chatbot(height=400, elem_classes="chat-container")
+                    with gr.Row():
+                        with gr.Column(scale=4):
+                            msg = gr.Textbox(placeholder="Ask a question about your documents...", show_label=False)
+                        with gr.Column(scale=1):
+                            submit_btn = gr.Button("Send", elem_classes="primary-button")
+                    with gr.Row():
+                        clear_btn = gr.Button("Clear Chat", scale=1)
+                with gr.Box(elem_classes="card"):
+                    gr.HTML(reference_html)
+                    with gr.Accordion("Document References", open=True):
+                        with gr.Box(elem_classes="reference-box"):
+                            with gr.Row():
+                                gr.Markdown("**Reference 1**", elem_classes="reference-box-title")
+                                source1_page = gr.Number(label="Page", show_label=False, elem_classes="page-number")
+                            doc_source1 = gr.Textbox(show_label=False, lines=2)
+                        with gr.Box(elem_classes="reference-box"):
+                            with gr.Row():
+                                gr.Markdown("**Reference 2**", elem_classes="reference-box-title")
+                                source2_page = gr.Number(label="Page", show_label=False, elem_classes="page-number")
+                            doc_source2 = gr.Textbox(show_label=False, lines=2)
+                        with gr.Box(elem_classes="reference-box"):
+                            with gr.Row():
+                                gr.Markdown("**Reference 3**", elem_classes="reference-box-title")
+                                source3_page = gr.Number(label="Page", show_label=False, elem_classes="page-number")
+                            doc_source3 = gr.Textbox(show_label=False, lines=2)
         # Preprocessing events
         db_btn.click(initialize_database, inputs=[document], outputs=[vector_db, db_progress])