DHEIVER commited on
Commit
1013dbf
·
verified ·
1 Parent(s): 649297f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +286 -114
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import os
3
- import secrets
4
  from functools import partial
5
 
6
  api_token = os.getenv("HF_TOKEN")
@@ -16,15 +15,6 @@ from langchain_community.llms import HuggingFaceEndpoint
16
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
17
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
18
 
19
- # Simulated user database (replace with a real database in production)
20
- USER_DB = {
21
- "admin": {"password": "securepass123", "email": "[email protected]"},
22
- "user1": {"password": "userpass456", "email": "[email protected]"}
23
- }
24
-
25
- # Session storage (in-memory for simplicity)
26
- SESSIONS = {}
27
-
28
  # Load and split PDF document
29
  def load_doc(list_file_path):
30
  loaders = [PyPDFLoader(x) for x in list_file_path]
@@ -46,7 +36,7 @@ def initialize_database(list_file_obj, progress=gr.Progress()):
46
  list_file_path = [x.name for x in list_file_obj if x is not None]
47
  doc_splits = load_doc(list_file_path)
48
  vector_db = create_db(doc_splits)
49
- return vector_db, "Database created!"
50
 
51
  # Initialize langchain LLM chain
52
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
@@ -86,7 +76,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
86
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
87
  llm_name = list_llm[llm_option]
88
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
89
- return qa_chain, "QA chain initialized. Chatbot is ready!"
90
 
91
  def format_chat_history(message, chat_history):
92
  formatted_chat_history = []
@@ -128,114 +118,296 @@ def conversation(qa_chain, message, history, language):
128
  new_history = history + [(message, response_answer)]
129
  return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
130
 
131
- # Login function
132
- def login(username, password):
133
- # Debugging: Uncomment the next line to see what’s being entered
134
- # print(f"Attempting login with username: {username}, password: {password}")
135
- if username in USER_DB and USER_DB[username]["password"] == password:
136
- session_token = secrets.token_hex(16)
137
- SESSIONS[session_token] = username
138
- return True, session_token, f"Welcome, {username}! You are now logged in."
139
- else:
140
- return False, None, "Invalid username or password. Please try again."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- # Logout function
143
- def logout(session_token):
144
- if session_token in SESSIONS:
145
- del SESSIONS[session_token]
146
- return False, None, "You have been logged out."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # Main demo with modern login
149
- def demo():
150
- with gr.Blocks(
151
- theme=gr.themes.Soft(primary_hue="blue", secondary_hue="gray", neutral_hue="slate"),
152
- css="""
153
- .login-box { max-width: 400px; margin: 50px auto; padding: 20px; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); }
154
- .title { text-align: center; font-size: 2em; margin-bottom: 20px; }
155
- .button { background-color: #007bff; color: white; border-radius: 5px; }
156
- .button:hover { background-color: #0056b3; }
157
- """
158
- ) as demo:
159
  # State variables
160
  vector_db = gr.State()
161
  qa_chain = gr.State()
162
- logged_in = gr.State(value=False)
163
- session_token = gr.State(value=None)
164
-
165
- # Login interface
166
- with gr.Column(elem_classes="login-box", visible=True) as login_col:
167
- gr.HTML("<h1 class='title'>RAG PDF Chatbot Login</h1>")
168
- username = gr.Textbox(label="Username", placeholder="Enter your username", lines=1)
169
- password = gr.Textbox(label="Password", type="password", placeholder="Enter your password", lines=1)
170
- login_btn = gr.Button("Login", elem_classes="button")
171
- login_message = gr.Textbox(value="Please log in to access the chatbot.", show_label=False, interactive=False)
172
-
173
- # Chatbot interface (hidden until login)
174
- with gr.Column(visible=False) as chatbot_col:
175
- with gr.Row():
176
- gr.HTML("<h1 class='title'>RAG PDF Chatbot</h1>")
177
- logout_btn = gr.Button("Logout", elem_classes="button", scale=0)
178
- gr.Markdown("""<b>Query your PDF documents!</b> This AI agent is designed to perform retrieval augmented generation (RAG) on PDF documents. \
179
- <b>Please do not upload confidential documents.</b>""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- with gr.Row():
182
- with gr.Column(scale=86):
183
- gr.Markdown("<b>Step 1 - Upload PDF documents and Initialize RAG pipeline</b>")
184
- document = gr.Files(height=300, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload PDF documents")
185
- db_btn = gr.Button("Create vector database", elem_classes="button")
186
- db_progress = gr.Textbox(value="Not initialized", show_label=False)
187
- gr.Markdown("<b>Select Large Language Model (LLM) and input parameters</b>")
188
- llm_btn = gr.Radio(list_llm_simple, label="Available LLMs", value=list_llm_simple[0], type="index")
189
- with gr.Accordion("LLM input parameters", open=False):
190
- slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature", interactive=True)
191
- slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max New Tokens", interactive=True)
192
- slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k", interactive=True)
193
- qachain_btn = gr.Button("Initialize Question Answering Chatbot", elem_classes="button")
194
- llm_progress = gr.Textbox(value="Not initialized", show_label=False)
195
-
196
- with gr.Column(scale=200):
197
- gr.Markdown("<b>Step 2 - Chat with your Document</b>")
198
- language_selector = gr.Radio(["English", "Português"], label="Select Language", value="English")
199
- chatbot = gr.Chatbot(height=505)
200
- with gr.Accordion("Relevant context from the source document", open=False):
201
- doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
202
- source1_page = gr.Number(label="Page", scale=1)
203
- doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
204
- source2_page = gr.Number(label="Page", scale=1)
205
- doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
206
- source3_page = gr.Number(label="Page", scale=1)
207
- msg = gr.Textbox(placeholder="Ask a question", container=True)
208
- submit_btn = gr.Button("Submit", elem_classes="button")
209
- clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
210
-
211
- # Login event
212
- login_btn.click(
213
- fn=login,
214
- inputs=[username, password],
215
- outputs=[logged_in, session_token, login_message]
216
- ).then(
217
- fn=lambda logged: (gr.update(visible=not logged), gr.update(visible=logged)),
218
- inputs=[logged_in],
219
- outputs=[login_col, chatbot_col],
220
- queue=False
221
- )
222
-
223
- # Logout event
224
- logout_btn.click(
225
- fn=logout,
226
- inputs=[session_token],
227
- outputs=[logged_in, session_token, login_message]
228
- ).then(
229
- fn=lambda logged: (gr.update(visible=not logged), gr.update(visible=logged)),
230
- inputs=[logged_in],
231
- outputs=[login_col, chatbot_col],
232
- queue=False
233
- ).then(
234
- fn=lambda: gr.update(value="Please log in to access the chatbot."),
235
- inputs=None,
236
- outputs=[login_message],
237
- queue=False
238
- )
239
 
240
  # Preprocessing events
241
  db_btn.click(initialize_database, inputs=[document], outputs=[vector_db, db_progress])
 
1
  import gradio as gr
2
  import os
 
3
  from functools import partial
4
 
5
  api_token = os.getenv("HF_TOKEN")
 
15
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
16
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
17
 
 
 
 
 
 
 
 
 
 
18
  # Load and split PDF document
19
  def load_doc(list_file_path):
20
  loaders = [PyPDFLoader(x) for x in list_file_path]
 
36
  list_file_path = [x.name for x in list_file_obj if x is not None]
37
  doc_splits = load_doc(list_file_path)
38
  vector_db = create_db(doc_splits)
39
+ return vector_db, "Database created successfully!"
40
 
41
  # Initialize langchain LLM chain
42
  def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
 
76
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
77
  llm_name = list_llm[llm_option]
78
  qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
79
+ return qa_chain, "QA chain initialized. Chatbot is ready! 🚀"
80
 
81
  def format_chat_history(message, chat_history):
82
  formatted_chat_history = []
 
118
  new_history = history + [(message, response_answer)]
119
  return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
120
 
121
+ # Main demo with enhanced UI
122
+ def demo():
123
+ # Custom CSS
124
+ custom_css = """
125
+ /* Global styles */
126
+ body {
127
+ font-family: 'Inter', sans-serif;
128
+ color: #333;
129
+ background-color: #f9fafb;
130
+ }
131
+
132
+ /* Header styles */
133
+ .header {
134
+ text-align: center;
135
+ padding: 20px 0;
136
+ margin-bottom: 20px;
137
+ background: linear-gradient(90deg, #3b82f6, #2563eb);
138
+ color: white;
139
+ border-radius: 10px;
140
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
141
+ }
142
+
143
+ .header h1 {
144
+ font-size: 2.5rem;
145
+ margin: 0;
146
+ padding: 0;
147
+ }
148
+
149
+ .header p {
150
+ font-size: 1.1rem;
151
+ margin: 10px 0 0;
152
+ opacity: 0.9;
153
+ }
154
+
155
+ /* Card styles */
156
+ .card {
157
+ background-color: white;
158
+ border-radius: 10px;
159
+ padding: 20px;
160
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
161
+ margin-bottom: 20px;
162
+ }
163
+
164
+ /* Section titles */
165
+ .section-title {
166
+ font-size: 1.25rem;
167
+ font-weight: 600;
168
+ margin-bottom: 15px;
169
+ color: #1e40af;
170
+ display: flex;
171
+ align-items: center;
172
+ }
173
+
174
+ .section-title svg {
175
+ margin-right: 8px;
176
+ }
177
+
178
+ /* Buttons */
179
+ .primary-button {
180
+ background: linear-gradient(90deg, #3b82f6, #2563eb);
181
+ color: white;
182
+ border: none;
183
+ padding: 10px 20px;
184
+ border-radius: 8px;
185
+ font-weight: 500;
186
+ cursor: pointer;
187
+ transition: all 0.2s ease;
188
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
189
+ }
190
+
191
+ .primary-button:hover {
192
+ background: linear-gradient(90deg, #2563eb, #1d4ed8);
193
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
194
+ transform: translateY(-1px);
195
+ }
196
+
197
+ /* Status indicators */
198
+ .status {
199
+ padding: 8px 12px;
200
+ border-radius: 6px;
201
+ font-size: 0.9rem;
202
+ font-weight: 500;
203
+ }
204
+
205
+ .status-success {
206
+ background-color: #d1fae5;
207
+ color: #065f46;
208
+ }
209
+
210
+ .status-waiting {
211
+ background-color: #fef3c7;
212
+ color: #92400e;
213
+ }
214
+
215
+ .status-error {
216
+ background-color: #fee2e2;
217
+ color: #b91c1c;
218
+ }
219
+
220
+ /* Chat container */
221
+ .chat-container {
222
+ border-radius: 10px;
223
+ border: 1px solid #e5e7eb;
224
+ overflow: hidden;
225
+ }
226
+
227
+ /* Document upload area */
228
+ .upload-area {
229
+ border: 2px dashed #d1d5db;
230
+ border-radius: 8px;
231
+ padding: 20px;
232
+ text-align: center;
233
+ background-color: #f9fafb;
234
+ transition: all 0.2s ease;
235
+ }
236
+
237
+ .upload-area:hover {
238
+ border-color: #3b82f6;
239
+ background-color: #eff6ff;
240
+ }
241
+
242
+ /* Parameter sliders */
243
+ .parameter-slider {
244
+ margin-bottom: 15px;
245
+ }
246
+
247
+ /* Reference boxes */
248
+ .reference-box {
249
+ background-color: #f3f4f6;
250
+ border-left: 4px solid #3b82f6;
251
+ padding: 10px 15px;
252
+ margin-bottom: 10px;
253
+ border-radius: 4px;
254
+ }
255
+
256
+ .reference-box-title {
257
+ font-weight: 600;
258
+ color: #1e40af;
259
+ margin-bottom: 5px;
260
+ display: flex;
261
+ justify-content: space-between;
262
+ }
263
+
264
+ .page-number {
265
+ background-color: #dbeafe;
266
+ color: #1e40af;
267
+ padding: 2px 8px;
268
+ border-radius: 12px;
269
+ font-size: 0.8rem;
270
+ }
271
+
272
+ /* Responsive adjustments */
273
+ @media (max-width: 768px) {
274
+ .header h1 {
275
+ font-size: 1.8rem;
276
+ }
277
+ }
278
+ """
279
 
280
+ # HTML Components
281
+ header_html = """
282
+ <div class="header">
283
+ <h1>📚 RAG PDF Chatbot</h1>
284
+ <p>Query your documents with AI-powered search and generation</p>
285
+ </div>
286
+ """
287
+
288
+ upload_html = """
289
+ <div class="section-title">
290
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
291
+ <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"></path>
292
+ <polyline points="17 8 12 3 7 8"></polyline>
293
+ <line x1="12" y1="3" x2="12" y2="15"></line>
294
+ </svg>
295
+ Upload your PDF documents
296
+ </div>
297
+ <p>Select one or more PDF files to analyze and chat with.</p>
298
+ """
299
+
300
+ model_html = """
301
+ <div class="section-title">
302
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
303
+ <path d="M12 2L2 7l10 5 10-5-10-5z"></path>
304
+ <path d="M2 17l10 5 10-5"></path>
305
+ <path d="M2 12l10 5 10-5"></path>
306
+ </svg>
307
+ Select AI Model
308
+ </div>
309
+ <p>Choose the language model that will process your questions.</p>
310
+ """
311
+
312
+ chat_html = """
313
+ <div class="section-title">
314
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
315
+ <path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"></path>
316
+ </svg>
317
+ Chat with your Documents
318
+ </div>
319
+ <p>Ask questions about your uploaded documents to get AI-powered answers.</p>
320
+ """
321
+
322
+ reference_html = """
323
+ <div class="section-title">
324
+ <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
325
+ <path d="M2 3h6a4 4 0 0 1 4 4v14a3 3 0 0 0-3-3H2z"></path>
326
+ <path d="M22 3h-6a4 4 0 0 0-4 4v14a3 3 0 0 1 3-3h7z"></path>
327
+ </svg>
328
+ Document References
329
+ </div>
330
+ <p>These are the relevant sections from your documents that the AI used to generate its response.</p>
331
+ """
332
 
333
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="blue", neutral_hue="slate"), css=custom_css) as demo:
 
 
 
 
 
 
 
 
 
 
334
  # State variables
335
  vector_db = gr.State()
336
  qa_chain = gr.State()
337
+
338
+ # Header
339
+ gr.HTML(header_html)
340
+
341
+ with gr.Row():
342
+ # Left column - Setup
343
+ with gr.Column(scale=1):
344
+ with gr.Box(elem_classes="card"):
345
+ gr.HTML(upload_html)
346
+ document = gr.Files(height=200, file_count="multiple", file_types=["pdf"], interactive=True)
347
+ db_btn = gr.Button("Create Vector Database", elem_classes="primary-button")
348
+ db_progress = gr.Textbox(value="Not initialized", show_label=False, elem_classes="status status-waiting")
349
+
350
+ with gr.Box(elem_classes="card"):
351
+ gr.HTML(model_html)
352
+ llm_btn = gr.Radio(list_llm_simple, label="", value=list_llm_simple[0], type="index")
353
+
354
+ with gr.Accordion("Advanced Parameters", open=False):
355
+ slider_temperature = gr.Slider(minimum=0.01, maximum=1.0, value=0.5, step=0.1, label="Temperature", interactive=True, elem_classes="parameter-slider")
356
+ slider_maxtokens = gr.Slider(minimum=128, maximum=9192, value=4096, step=128, label="Max Tokens", interactive=True, elem_classes="parameter-slider")
357
+ slider_topk = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="Top-K", interactive=True, elem_classes="parameter-slider")
358
+
359
+ qachain_btn = gr.Button("Initialize Chatbot", elem_classes="primary-button")
360
+ llm_progress = gr.Textbox(value="Not initialized", show_label=False, elem_classes="status status-waiting")
361
+
362
+ with gr.Box(elem_classes="card"):
363
+ gr.Markdown("### Usage Instructions")
364
+ gr.Markdown("""
365
+ 1. Upload one or more PDF documents
366
+ 2. Click "Create Vector Database"
367
+ 3. Select your preferred AI model
368
+ 4. Click "Initialize Chatbot"
369
+ 5. Start asking questions about your documents
370
+
371
+ **Note:** The system will analyze your documents and use AI to answer questions based on their content.
372
+ """)
373
 
374
+ # Right column - Chat
375
+ with gr.Column(scale=1.5):
376
+ with gr.Box(elem_classes="card"):
377
+ gr.HTML(chat_html)
378
+ language_selector = gr.Radio(["English", "Português"], label="Response Language", value="English")
379
+
380
+ chatbot = gr.Chatbot(height=400, elem_classes="chat-container")
381
+
382
+ with gr.Row():
383
+ with gr.Column(scale=4):
384
+ msg = gr.Textbox(placeholder="Ask a question about your documents...", show_label=False)
385
+ with gr.Column(scale=1):
386
+ submit_btn = gr.Button("Send", elem_classes="primary-button")
387
+
388
+ with gr.Row():
389
+ clear_btn = gr.Button("Clear Chat", scale=1)
390
+
391
+ with gr.Box(elem_classes="card"):
392
+ gr.HTML(reference_html)
393
+ with gr.Accordion("Document References", open=True):
394
+ with gr.Box(elem_classes="reference-box"):
395
+ with gr.Row():
396
+ gr.Markdown("**Reference 1**", elem_classes="reference-box-title")
397
+ source1_page = gr.Number(label="Page", show_label=False, elem_classes="page-number")
398
+ doc_source1 = gr.Textbox(show_label=False, lines=2)
399
+
400
+ with gr.Box(elem_classes="reference-box"):
401
+ with gr.Row():
402
+ gr.Markdown("**Reference 2**", elem_classes="reference-box-title")
403
+ source2_page = gr.Number(label="Page", show_label=False, elem_classes="page-number")
404
+ doc_source2 = gr.Textbox(show_label=False, lines=2)
405
+
406
+ with gr.Box(elem_classes="reference-box"):
407
+ with gr.Row():
408
+ gr.Markdown("**Reference 3**", elem_classes="reference-box-title")
409
+ source3_page = gr.Number(label="Page", show_label=False, elem_classes="page-number")
410
+ doc_source3 = gr.Textbox(show_label=False, lines=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
 
412
  # Preprocessing events
413
  db_btn.click(initialize_database, inputs=[document], outputs=[vector_db, db_progress])