pdf-rag-chatbot

Sleeping

App Files Files Community

farmax commited on Oct 13, 2024

Commit

dda91f4

verified ·

1 Parent(s): 1eff01a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ import tqdm
 import accelerate
 import re
 # default_persist_directory = './chroma_HF/'
 list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
@@ -207,23 +207,34 @@ def create_collection_name(filepath):
     print('Collection name: ', collection_name)
     return collection_name
-# Initialize database
 def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Progress()):
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
-    progress(0.1, desc="Creating collection name...")
     collection_name = create_collection_name(list_file_path[0])
-    progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Create or load vector database
     progress(0.5, desc="Generating vector database...")
-    # global vector_db
     vector_db = create_db(doc_splits, collection_name)
-    progress(0.9, desc="Done!")
-    return vector_db, collection_name, "Complete!"
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     # print("llm_option",llm_option)
@@ -244,7 +255,7 @@ def format_chat_history(message, chat_history):
 def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
-    #print("formatted_chat_history",formatted_chat_history)
     # Generate response using QA chain
     response = qa_chain({"question": message, "chat_history": formatted_chat_history})
@@ -259,8 +270,8 @@ def conversation(qa_chain, message, history):
     response_source1_page = response_sources[0].metadata["page"] + 1
     response_source2_page = response_sources[1].metadata["page"] + 1
     response_source3_page = response_sources[2].metadata["page"] + 1
-    # print ('chat response: ', response_answer)
-    # print('DB source', response_sources)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
@@ -310,9 +321,9 @@ def demo():
             with gr.Row():
                 db_progress = gr.Textbox(label="Vector database initialization", value="None")
             with gr.Row():
-                db_btn = gr.Button("Generate vector database")
-        with gr.Tab("Step 3 - Initializia QA chain"):
             with gr.Row():
                 llm_btn = gr.Radio(list_llm_simple, \
                     label="LLM models", value = list_llm_simple[0], type="index", info="Scegli il tuo modello LLM")
@@ -328,7 +339,7 @@ def demo():
             with gr.Row():
                 llm_progress = gr.Textbox(value="None",label="QA chain initialization")
             with gr.Row():
-                qachain_btn = gr.Button("Initializza Question Answering chain")
         with gr.Tab("Passo 4 - Chatbot"):

 import accelerate
 import re
+from chromadb.utils import get_default_config
 # default_persist_directory = './chroma_HF/'
 list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
     print('Collection name: ', collection_name)
     return collection_name
+# Inizializzazione database
 def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Progress()):
+    # Check if a database already exists
+    try:
+        # Get the current configuration
+        config = get_default_config()
+        # Delete the existing database
+        chromadb.delete(config)
+        print("Existing database deleted successfully.")
+    except Exception as e:
+        print(f"Error deleting existing database: {str(e)}")
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
+    progress(0.1, desc="Creazione collection name...")
     collection_name = create_collection_name(list_file_path[0])
+    progress(0.25, desc="Caricamento documenti..")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Create or load vector database
     progress(0.5, desc="Generating vector database...")
     vector_db = create_db(doc_splits, collection_name)
+    progress(0.9, desc="Fatto!")
+    return vector_db, collection_name, "Completato!"
 def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
     # print("llm_option",llm_option)
 def conversation(qa_chain, message, history):
     formatted_chat_history = format_chat_history(message, history)
+    print("formatted_chat_history",formatted_chat_history)
     # Generate response using QA chain
     response = qa_chain({"question": message, "chat_history": formatted_chat_history})
     response_source1_page = response_sources[0].metadata["page"] + 1
     response_source2_page = response_sources[1].metadata["page"] + 1
     response_source3_page = response_sources[2].metadata["page"] + 1
+    print ('chat response: ', response_answer)
+    print('DB source', response_sources)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
             with gr.Row():
                 db_progress = gr.Textbox(label="Vector database initialization", value="None")
             with gr.Row():
+                db_btn = gr.Button("Genera vector database")
+        with gr.Tab("Step 3 - Inizializza QA chain"):
             with gr.Row():
                 llm_btn = gr.Radio(list_llm_simple, \
                     label="LLM models", value = list_llm_simple[0], type="index", info="Scegli il tuo modello LLM")
             with gr.Row():
                 llm_progress = gr.Textbox(value="None",label="QA chain initialization")
             with gr.Row():
+                qachain_btn = gr.Button("Inizializza Question Answering chain")
         with gr.Tab("Passo 4 - Chatbot"):