Shreyas094 commited on
Commit
5b68e03
·
verified ·
1 Parent(s): 4977c9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -368
app.py CHANGED
@@ -1,48 +1,21 @@
1
  import os
2
  import json
3
- import re
4
  import gradio as gr
5
- import requests
6
  from duckduckgo_search import DDGS
7
  from typing import List
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
11
- from langchain_core.vectorstores import VectorStore
12
  from langchain_core.documents import Document
13
  from langchain_community.document_loaders import PyPDFLoader
14
  from langchain_community.embeddings import HuggingFaceEmbeddings
15
  from llama_parse import LlamaParse
16
- from langchain_core.documents import Document
17
- from huggingface_hub import InferenceClient
18
- import inspect
19
  import logging
20
  import shutil
21
 
22
-
23
- # Set up basic configuration for logging
24
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
25
-
26
- # Environment variables and configurations
27
- huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
28
- llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
29
- ACCOUNT_ID = os.environ.get("CLOUDFARE_ACCOUNT_ID")
30
- API_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
31
- API_BASE_URL = "https://api.cloudflare.com/client/v4/accounts/a17f03e0f049ccae0c15cdcf3b9737ce/ai/run/"
32
-
33
- print(f"ACCOUNT_ID: {ACCOUNT_ID}")
34
- print(f"CLOUDFLARE_AUTH_TOKEN: {API_TOKEN[:5]}..." if API_TOKEN else "Not set")
35
-
36
- MODELS = [
37
- "mistralai/Mistral-7B-Instruct-v0.3",
38
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
39
- "@cf/meta/llama-3.1-8b-instruct",
40
- "mistralai/Mistral-Nemo-Instruct-2407"
41
- ]
42
-
43
  # Initialize LlamaParse
44
  llama_parser = LlamaParse(
45
- api_key=llama_cloud_api_key,
46
  result_type="markdown",
47
  num_workers=4,
48
  verbose=True,
@@ -69,7 +42,6 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
69
  def get_embeddings():
70
  return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
71
 
72
- # Add this at the beginning of your script, after imports
73
  DOCUMENTS_FILE = "uploaded_documents.json"
74
 
75
  def load_documents():
@@ -82,7 +54,6 @@ def save_documents(documents):
82
  with open(DOCUMENTS_FILE, "w") as f:
83
  json.dump(documents, f)
84
 
85
- # Replace the global uploaded_documents with this
86
  uploaded_documents = load_documents()
87
 
88
  # Modify the update_vectors function
@@ -181,142 +152,37 @@ def delete_documents(selected_docs):
181
 
182
  return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
183
 
184
- def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
185
- print(f"Starting generate_chunked_response with {num_calls} calls")
186
- full_response = ""
187
- messages = [{"role": "user", "content": prompt}]
188
-
189
- if model == "@cf/meta/llama-3.1-8b-instruct":
190
- # Cloudflare API
191
- for i in range(num_calls):
192
- print(f"Starting Cloudflare API call {i+1}")
193
- if should_stop:
194
- print("Stop clicked, breaking loop")
195
- break
196
- try:
197
- response = requests.post(
198
- f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
199
- headers={"Authorization": f"Bearer {API_TOKEN}"},
200
- json={
201
- "stream": true,
202
- "messages": [
203
- {"role": "system", "content": "You are a friendly assistant"},
204
- {"role": "user", "content": prompt}
205
- ],
206
- "max_tokens": max_tokens,
207
- "temperature": temperature
208
- },
209
- stream=true
210
- )
211
-
212
- for line in response.iter_lines():
213
- if should_stop:
214
- print("Stop clicked during streaming, breaking")
215
- break
216
- if line:
217
- try:
218
- json_data = json.loads(line.decode('utf-8').split('data: ')[1])
219
- chunk = json_data['response']
220
- full_response += chunk
221
- except json.JSONDecodeError:
222
- continue
223
- print(f"Cloudflare API call {i+1} completed")
224
- except Exception as e:
225
- print(f"Error in generating response from Cloudflare: {str(e)}")
226
- else:
227
- # Original Hugging Face API logic
228
- client = InferenceClient(model, token=huggingface_token)
229
-
230
- for i in range(num_calls):
231
- print(f"Starting Hugging Face API call {i+1}")
232
- if should_stop:
233
- print("Stop clicked, breaking loop")
234
- break
235
- try:
236
- for message in client.chat_completion(
237
- messages=messages,
238
- max_tokens=max_tokens,
239
- temperature=temperature,
240
- stream=True,
241
- ):
242
- if should_stop:
243
- print("Stop clicked during streaming, breaking")
244
- break
245
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
246
- chunk = message.choices[0].delta.content
247
- full_response += chunk
248
- print(f"Hugging Face API call {i+1} completed")
249
- except Exception as e:
250
- print(f"Error in generating response from Hugging Face: {str(e)}")
251
-
252
- # Clean up the response
253
- clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
254
- clean_response = clean_response.replace("Using the following context:", "").strip()
255
- clean_response = clean_response.replace("Using the following context from the PDF documents:", "").strip()
256
-
257
- # Remove duplicate paragraphs and sentences
258
- paragraphs = clean_response.split('\n\n')
259
- unique_paragraphs = []
260
- for paragraph in paragraphs:
261
- if paragraph not in unique_paragraphs:
262
- sentences = paragraph.split('. ')
263
- unique_sentences = []
264
- for sentence in sentences:
265
- if sentence not in unique_sentences:
266
- unique_sentences.append(sentence)
267
- unique_paragraphs.append('. '.join(unique_sentences))
268
-
269
- final_response = '\n\n'.join(unique_paragraphs)
270
-
271
- print(f"Final clean response: {final_response[:100]}...")
272
- return final_response
273
-
274
- def duckduckgo_search(query):
275
- with DDGS() as ddgs:
276
- results = ddgs.text(query, max_results=5)
277
- return results
278
 
279
- class CitingSources(BaseModel):
280
- sources: List[str] = Field(
281
- ...,
282
- description="List of sources to cite. Should be an URL of the source."
 
283
  )
284
- def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
285
- if not message.strip():
286
- return "", history
287
-
288
- history = history + [(message, "")]
289
-
290
- try:
291
- for response in respond(message, history, model, temperature, num_calls, use_web_search):
292
- history[-1] = (message, response)
293
- yield history
294
- except gr.CancelledError:
295
- yield history
296
- except Exception as e:
297
- logging.error(f"Unexpected error in chatbot_interface: {str(e)}")
298
- history[-1] = (message, f"An unexpected error occurred: {str(e)}")
299
- yield history
300
-
301
- def retry_last_response(history, use_web_search, model, temperature, num_calls):
302
- if not history:
303
- return history
304
-
305
- last_user_msg = history[-1][0]
306
- history = history[:-1] # Remove the last response
307
-
308
- return chatbot_interface(last_user_msg, history, use_web_search, model, temperature, num_calls)
309
 
310
- from duckduckgo_search import DDGS
 
 
 
 
 
 
 
 
311
 
312
- def respond(message, history, use_web_search, model, temperature, num_calls, selected_docs):
313
  logging.info(f"User Query: {message}")
314
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
315
  logging.info(f"Selected Documents: {selected_docs}")
 
316
 
317
  try:
318
  if use_web_search:
319
- for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
320
  response = f"{main_content}\n\n{sources}"
321
  yield response
322
  else:
@@ -324,10 +190,11 @@ def respond(message, history, use_web_search, model, temperature, num_calls, sel
324
  if os.path.exists("faiss_database"):
325
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
326
  retriever = database.as_retriever(search_kwargs={"k": 20})
327
-
 
328
  all_relevant_docs = retriever.get_relevant_documents(message)
329
  relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
330
-
331
  if not relevant_docs:
332
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
333
  return
@@ -337,231 +204,90 @@ def respond(message, history, use_web_search, model, temperature, num_calls, sel
337
  context_str = "No documents available."
338
  yield "No documents available. Please upload PDF documents to answer questions."
339
  return
340
-
341
- prompt = f"""Using the following context from the PDF documents:
342
- {context_str}
343
- Write a detailed and complete response that answers the following user question: '{message}'"""
344
-
345
- try:
346
- response = DDGS().chat(prompt, model="llama-3-70b")
347
- yield response
348
- except Exception as e:
349
- logging.error(f"Error with DuckDuckGo chat API: {str(e)}")
350
- logging.info("Falling back to Hugging Face API")
351
- yield from get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature)
352
 
 
 
353
  except Exception as e:
354
- logging.error(f"Error: {str(e)}")
355
- yield f"An error occurred: {str(e)}. Please try again or select a different model."
356
-
357
- logging.basicConfig(level=logging.DEBUG)
358
-
359
- def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2, search_type="pdf"):
360
- headers = {
361
- "Authorization": f"Bearer {API_TOKEN}",
362
- "Content-Type": "application/json"
363
- }
364
- model = "@cf/meta/llama-3.1-8b-instruct"
365
-
366
- if search_type == "pdf":
367
- instruction = f"""Using the following context from the PDF documents:
368
- {context}
369
- Write a detailed and complete response that answers the following user question: '{query}'"""
370
- else: # web search
371
- instruction = f"""Using the following context:
372
- {context}
373
- Write a detailed and complete research document that fulfills the following user request: '{query}'
374
- After writing the document, please provide a list of sources used in your response."""
375
-
376
- inputs = [
377
- {"role": "system", "content": instruction},
378
- {"role": "user", "content": query}
379
- ]
380
-
381
- payload = {
382
- "messages": inputs,
383
- "stream": True,
384
- "temperature": temperature,
385
- "max_tokens": 32000
386
- }
387
 
388
- full_response = ""
389
- for i in range(num_calls):
390
  try:
391
- with requests.post(f"{API_BASE_URL}{model}", headers=headers, json=payload, stream=True) as response:
392
- if response.status_code == 200:
393
- for line in response.iter_lines():
394
- if line:
395
- try:
396
- json_response = json.loads(line.decode('utf-8').split('data: ')[1])
397
- if 'response' in json_response:
398
- chunk = json_response['response']
399
- full_response += chunk
400
- yield full_response
401
- except (json.JSONDecodeError, IndexError) as e:
402
- logging.error(f"Error parsing streaming response: {str(e)}")
403
- continue
404
- else:
405
- logging.error(f"HTTP Error: {response.status_code}, Response: {response.text}")
406
- yield f"I apologize, but I encountered an HTTP error: {response.status_code}. Please try again later."
407
  except Exception as e:
408
- logging.error(f"Error in generating response from Cloudflare: {str(e)}")
409
- yield f"I apologize, but an error occurred: {str(e)}. Please try again later."
410
-
411
- if not full_response:
412
- yield "I apologize, but I couldn't generate a response at this time. Please try again later."
413
 
414
- def create_web_search_vectors(search_results):
415
- embed = get_embeddings()
416
-
417
- documents = []
418
- for result in search_results:
419
- if 'body' in result:
420
- content = f"{result['title']}\n{result['body']}\nSource: {result['href']}"
421
- documents.append(Document(page_content=content, metadata={"source": result['href']}))
422
-
423
- return FAISS.from_documents(documents, embed)
424
 
425
- def get_response_with_search(query, model, num_calls=3, temperature=0.2):
426
- search_results = duckduckgo_search(query)
427
- context = "\n".join([f"{result['title']}\n{result['body']}" for result in search_results])
428
-
429
- prompt = f"""Using the following context from web search results:
430
- {context}
431
- You are an expert AI assistant, write a detailed and complete research document that fulfills the following user request: '{query}'
432
- Base your entire response strictly on the information retrieved from trusted sources. Importantly, only include information that is directly supported by the retrieved content.
433
- If any part of the information cannot be verified from the given sources, clearly state that it could not be confirmed.
434
- After writing the document, please provide a list of sources used in your response."""
435
 
436
  try:
437
- response = DDGS().chat(prompt, model="llama-3-70b")
438
- yield response, ""
 
 
 
439
  except Exception as e:
440
- logging.error(f"Error with DuckDuckGo chat API: {str(e)}")
441
- logging.info("Falling back to Hugging Face API")
442
- yield from get_response_from_huggingface(prompt, model, num_calls, temperature)
443
-
444
- def get_response_from_huggingface(prompt, model, num_calls=3, temperature=0.2):
445
- client = InferenceClient(model, token=huggingface_token)
446
-
447
- main_content = ""
448
- for i in range(num_calls):
449
- for message in client.chat_completion(
450
- messages=[{"role": "user", "content": prompt}],
451
- max_tokens=10000,
452
- temperature=temperature,
453
- stream=True,
454
- ):
455
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
456
- chunk = message.choices[0].delta.content
457
- main_content += chunk
458
- yield main_content, "" # Yield partial main content without sources
459
-
460
- def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
461
- logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
462
-
463
- embed = get_embeddings()
464
- if os.path.exists("faiss_database"):
465
- logging.info("Loading FAISS database")
466
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
467
- else:
468
- logging.warning("No FAISS database found")
469
- yield "No documents available. Please upload PDF documents to answer questions."
470
- return
471
 
472
- filtered_docs = []
473
- for doc_id, doc in database.docstore._dict.items():
474
- if isinstance(doc, Document) and doc.metadata.get("source") in selected_docs:
475
- filtered_docs.append(doc)
476
-
477
- logging.info(f"Number of documents after pre-filtering: {len(filtered_docs)}")
478
 
479
- if not filtered_docs:
480
- logging.warning(f"No documents found for the selected sources: {selected_docs}")
481
- yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
482
  return
483
 
484
- filtered_db = FAISS.from_documents(filtered_docs, embed)
485
-
486
- retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
487
- logging.info(f"Retrieving relevant documents for query: {query}")
488
  relevant_docs = retriever.get_relevant_documents(query)
489
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
490
 
491
- context_str = "\n".join([doc.page_content for doc in relevant_docs])
492
- logging.info(f"Total context length: {len(context_str)}")
493
 
494
- prompt = f"""Using the following context from the PDF documents:
495
- {context_str}
496
- Write a detailed and complete response that answers the following user question: '{query}'"""
497
 
498
- try:
499
- response = DDGS().chat(prompt, model="llama-3-70b")
500
- yield response
501
- except Exception as e:
502
- logging.error(f"Error with DuckDuckGo chat API: {str(e)}")
503
- logging.info("Falling back to Hugging Face API")
504
- yield from get_response_from_huggingface(prompt, model, num_calls, temperature)
505
-
506
- logging.info("Finished generating response")
507
 
508
- def vote(data: gr.LikeData):
509
- if data.liked:
510
- print(f"You upvoted this response: {data.value}")
511
- else:
512
- print(f"You downvoted this response: {data.value}")
513
-
514
- css = """
515
- /* Fine-tune chatbox size */
516
- .chatbot-container {
517
- height: 600px !important;
518
- width: 100% !important;
519
- }
520
- .chatbot-container > div {
521
- height: 100%;
522
- width: 100%;
523
- }
524
- """
525
-
526
- uploaded_documents = []
527
 
528
- def display_documents():
529
- return gr.CheckboxGroup(
530
- choices=[doc["name"] for doc in uploaded_documents],
531
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
532
- label="Select documents to query or delete"
533
- )
534
 
535
- def initial_conversation():
536
- return [
537
- (None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
538
- "1. Set the toggle for Web Search and PDF Search from the checkbox in Additional Inputs drop down window\n"
539
- "2. Use web search to find information\n"
540
- "3. Upload the documents and ask questions about uploaded PDF documents by selecting your respective document\n"
541
- "4. For any queries feel free to reach out @[email protected] or discord - shreyas094\n\n"
542
- "To get started, upload some PDFs or ask me a question!")
543
- ]
544
- # Add this new function
545
  def refresh_documents():
546
  global uploaded_documents
547
  uploaded_documents = load_documents()
548
  return display_documents()
549
 
550
- # Define the checkbox outside the demo block
551
- document_selector = gr.CheckboxGroup(label="Select documents to query")
 
 
 
 
552
 
 
553
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
 
554
 
555
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
556
 
557
  demo = gr.ChatInterface(
558
- respond,
559
  additional_inputs=[
560
- gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
561
- gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
562
- gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
563
  use_web_search,
564
- document_selector
 
565
  ],
566
  title="AI-powered Web Search and PDF Chat Assistant",
567
  description="Chat with your PDFs or use web search to answer questions. Toggle between Web Search and PDF Chat in Additional Inputs below.",
@@ -592,16 +318,15 @@ demo = gr.ChatInterface(
592
  cache_examples=False,
593
  analytics_enabled=False,
594
  textbox=gr.Textbox(placeholder=custom_placeholder, container=False, scale=7),
595
- chatbot = gr.Chatbot(
596
- show_copy_button=True,
597
- likeable=True,
598
- layout="bubble",
599
- height=400,
600
- value=initial_conversation()
601
- )
602
  )
603
 
604
- # Add file upload functionality
605
  with demo:
606
  gr.Markdown("## Upload and Manage PDF Documents")
607
 
@@ -610,21 +335,18 @@ with demo:
610
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
611
  update_button = gr.Button("Upload Document")
612
  refresh_button = gr.Button("Refresh Document List")
613
-
614
  update_output = gr.Textbox(label="Update Status")
615
  delete_button = gr.Button("Delete Selected Documents")
616
-
617
- # Update both the output text and the document selector
618
  update_button.click(update_vectors,
619
  inputs=[file_input, parser_dropdown],
620
  outputs=[update_output, document_selector])
621
-
622
- # Add the refresh button functionality
623
  refresh_button.click(refresh_documents,
624
  inputs=[],
625
  outputs=[document_selector])
626
-
627
- # Add the delete button functionality
628
  delete_button.click(delete_documents,
629
  inputs=[document_selector],
630
  outputs=[update_output, document_selector])
@@ -635,12 +357,12 @@ with demo:
635
  1. Upload PDF documents using the file input at the top.
636
  2. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
637
  3. Select the documents you want to query using the checkboxes.
638
- 4. Ask questions in the chat interface.
639
- 5. Toggle "Use Web Search" to switch between PDF chat and web search.
640
- 6. Adjust Temperature and Number of API Calls to fine-tune the response generation.
641
  7. Use the provided examples or ask your own questions.
642
  """
643
  )
644
 
645
  if __name__ == "__main__":
646
- demo.launch(share=True)
 
1
  import os
2
  import json
 
3
  import gradio as gr
 
4
  from duckduckgo_search import DDGS
5
  from typing import List
6
  from pydantic import BaseModel, Field
7
  from tempfile import NamedTemporaryFile
8
  from langchain_community.vectorstores import FAISS
 
9
  from langchain_core.documents import Document
10
  from langchain_community.document_loaders import PyPDFLoader
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
  from llama_parse import LlamaParse
 
 
 
13
  import logging
14
  import shutil
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Initialize LlamaParse
17
  llama_parser = LlamaParse(
18
+ api_key=os.environ.get("LLAMA_CLOUD_API_KEY"),
19
  result_type="markdown",
20
  num_workers=4,
21
  verbose=True,
 
42
  def get_embeddings():
43
  return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
44
 
 
45
  DOCUMENTS_FILE = "uploaded_documents.json"
46
 
47
  def load_documents():
 
54
  with open(DOCUMENTS_FILE, "w") as f:
55
  json.dump(documents, f)
56
 
 
57
  uploaded_documents = load_documents()
58
 
59
  # Modify the update_vectors function
 
152
 
153
  return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
154
 
155
+ def refresh_documents():
156
+ global uploaded_documents
157
+ uploaded_documents = load_documents()
158
+ return display_documents()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ def display_documents():
161
+ return gr.CheckboxGroup(
162
+ choices=[doc["name"] for doc in uploaded_documents],
163
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
164
+ label="Select documents to query or delete"
165
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ def initial_conversation():
168
+ return [
169
+ (None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
170
+ "1. Set the toggle for Web Search and PDF Search from the checkbox in Additional Inputs drop down window\n"
171
+ "2. Use web search to find information\n"
172
+ "3. Upload the documents and ask questions about uploaded PDF documents by selecting your respective document\n"
173
+ "4. For any queries feel free to reach out @[email protected] or discord - shreyas094\n\n"
174
+ "To get started, upload some PDFs or ask me a question!")
175
+ ]
176
 
177
+ def respond(message, history, use_web_search, selected_docs, model):
178
  logging.info(f"User Query: {message}")
179
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
180
  logging.info(f"Selected Documents: {selected_docs}")
181
+ logging.info(f"Model Used: {model}")
182
 
183
  try:
184
  if use_web_search:
185
+ for main_content, sources in get_response_with_search(message):
186
  response = f"{main_content}\n\n{sources}"
187
  yield response
188
  else:
 
190
  if os.path.exists("faiss_database"):
191
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
192
  retriever = database.as_retriever(search_kwargs={"k": 20})
193
+
194
+ # Filter relevant documents based on user selection
195
  all_relevant_docs = retriever.get_relevant_documents(message)
196
  relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
197
+
198
  if not relevant_docs:
199
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
200
  return
 
204
  context_str = "No documents available."
205
  yield "No documents available. Please upload PDF documents to answer questions."
206
  return
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ for partial_response in get_response_from_duckduckgo(message, context_str, model):
209
+ yield partial_response
210
  except Exception as e:
211
+ logging.error(f"Error in responding: {str(e)}")
212
+ yield f"An error occurred: {str(e)}. Please try again later."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
+ def get_response_from_duckduckgo(query, context, model):
215
+ with DDGS() as ddgs:
216
  try:
217
+ response = ddgs.chat(f"Using the following context:\n{context}\nRespond to the following query: {query}", model=model, timeout=30)
218
+ yield response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  except Exception as e:
220
+ logging.error(f"Error in getting response from Duckduckgo: {str(e)}")
221
+ yield f"An error occurred: {str(e)}. Please try again later."
 
 
 
222
 
223
+ def chatbot_interface(message, history, use_web_search, model, selected_docs):
224
+ if not message.strip():
225
+ return "", history
 
 
 
 
 
 
 
226
 
227
+ history = history + [(message, "")]
 
 
 
 
 
 
 
 
 
228
 
229
  try:
230
+ for response in respond(message, history, use_web_search, selected_docs, model):
231
+ history[-1] = (message, response)
232
+ yield history
233
+ except gr.CancelledError:
234
+ yield history
235
  except Exception as e:
236
+ logging.error(f"Unexpected error in chatbot_interface: {str(e)}")
237
+ history[-1] = (message, f"An unexpected error occurred: {str(e)}")
238
+ yield history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ def get_response_with_search(query):
241
+ search_results = duckduckgo_search(query)
242
+ web_search_database = create_web_search_vectors(search_results)
 
 
 
243
 
244
+ if not web_search_database:
245
+ yield "No web search results available. Please try again.", ""
 
246
  return
247
 
248
+ retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
 
 
 
249
  relevant_docs = retriever.get_relevant_documents(query)
 
250
 
251
+ context = "\n".join([doc.page_content for doc in relevant_docs])
 
252
 
253
+ for partial_response in get_response_from_duckduckgo(query, context, "gpt-4o-mini"): # Use the default model for web search
254
+ yield partial_response, "" # Yield streaming response without sources
 
255
 
256
+ def create_web_search_vectors(search_results):
257
+ embed = get_embeddings()
 
 
 
 
 
 
 
258
 
259
+ documents = []
260
+ for result in search_results:
261
+ if 'body' in result:
262
+ content = f"{result['title']}\n{result['body']}\nSource: {result['href']}"
263
+ documents.append(Document(page_content=content, metadata={"source": result['href']}))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
+ return FAISS.from_documents(documents, embed)
 
 
 
 
 
266
 
 
 
 
 
 
 
 
 
 
 
267
  def refresh_documents():
268
  global uploaded_documents
269
  uploaded_documents = load_documents()
270
  return display_documents()
271
 
272
+ DUCKDUCKGO_CHAT_MODELS = [
273
+ "gpt-4o-mini",
274
+ "claude-3-haiku",
275
+ "llama-3.1-70b",
276
+ "mixtral-8x7b"
277
+ ]
278
 
279
+ document_selector = gr.CheckboxGroup(label="Select documents to query")
280
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
281
+ model_selector = gr.Dropdown(choices=DUCKDUCKGO_CHAT_MODELS, label="Select Duckduckgo Chat Model", value="gpt-4o-mini")
282
 
283
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
284
 
285
  demo = gr.ChatInterface(
286
+ chatbot_interface,
287
  additional_inputs=[
 
 
 
288
  use_web_search,
289
+ document_selector,
290
+ model_selector
291
  ],
292
  title="AI-powered Web Search and PDF Chat Assistant",
293
  description="Chat with your PDFs or use web search to answer questions. Toggle between Web Search and PDF Chat in Additional Inputs below.",
 
318
  cache_examples=False,
319
  analytics_enabled=False,
320
  textbox=gr.Textbox(placeholder=custom_placeholder, container=False, scale=7),
321
+ chatbot=gr.Chatbot(
322
+ show_copy_button=True,
323
+ likeable=True,
324
+ layout="bubble",
325
+ height=400,
326
+ value=initial_conversation()
327
+ )
328
  )
329
 
 
330
  with demo:
331
  gr.Markdown("## Upload and Manage PDF Documents")
332
 
 
335
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
336
  update_button = gr.Button("Upload Document")
337
  refresh_button = gr.Button("Refresh Document List")
338
+
339
  update_output = gr.Textbox(label="Update Status")
340
  delete_button = gr.Button("Delete Selected Documents")
341
+
 
342
  update_button.click(update_vectors,
343
  inputs=[file_input, parser_dropdown],
344
  outputs=[update_output, document_selector])
345
+
 
346
  refresh_button.click(refresh_documents,
347
  inputs=[],
348
  outputs=[document_selector])
349
+
 
350
  delete_button.click(delete_documents,
351
  inputs=[document_selector],
352
  outputs=[update_output, document_selector])
 
357
  1. Upload PDF documents using the file input at the top.
358
  2. Select the PDF parser (pypdf or llamaparse) and click "Upload Document" to update the vector store.
359
  3. Select the documents you want to query using the checkboxes.
360
+ 4. Select the Duckduckgo Chat Model you want to use.
361
+ 5. Ask questions in the chat interface.
362
+ 6. Toggle "Use Web Search" to switch between PDF chat and web search.
363
  7. Use the provided examples or ask your own questions.
364
  """
365
  )
366
 
367
  if __name__ == "__main__":
368
+ demo.launch(share=True)