Spaces:

ryanrwatkins
/

needs

Sleeping

App Files Files Community

ryanrwatkins commited on Apr 2, 2024

Commit

0ca4304

verified ·

1 Parent(s): 93d7318

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -101

app.py CHANGED Viewed

@@ -140,7 +140,7 @@ def on_prompt_template_change_description(prompt_template):
 def langchain_document_loader():
     """
@@ -180,6 +180,8 @@ def langchain_document_loader():
     """
     return documents
 langchain_document_loader()
 text_splitter = RecursiveCharacterTextSplitter(
@@ -193,6 +195,7 @@ chunks = text_splitter.split_documents(documents=documents)
 def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
     """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
@@ -211,6 +214,7 @@ print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))
 print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
 def select_embeddings_model(LLM_service="HuggingFace"):
     """Connect to the embeddings API endpoint by specifying
@@ -244,6 +248,7 @@ embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
 def create_vectorstore(embeddings,documents,vectorstore_name):
     """Create a Chroma vector database."""
@@ -261,6 +266,8 @@ def create_vectorstore(embeddings,documents,vectorstore_name):
 create_vectorstores = True # change to True to create vectorstores
 if create_vectorstores:
     """
     vector_store_OpenAI,_ = create_vectorstore(
@@ -288,6 +295,9 @@ if create_vectorstores:
     print("")
 """
 vector_store_OpenAI = Chroma(
     persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
@@ -302,13 +312,15 @@ vector_store_google = Chroma(
 print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
 """
 vector_store_HF = Chroma(
     persist_directory = current_dir + "/Vit_All_HF_Embeddings",
     embedding_function=embeddings_HuggingFace)
 print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
-# Create a new file
 def Vectorstore_backed_retriever(
 vectorstore,search_type="similarity",k=10,score_threshold=None
@@ -341,6 +353,8 @@ base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=
 def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
     """Build a ContextualCompressionRetriever.
     We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
@@ -389,6 +403,8 @@ compression_retriever_HF = create_compression_retriever(
     k=16)
 '''
 def CohereRerank_retriever(
     base_retriever,
@@ -417,6 +433,9 @@ def CohereRerank_retriever(
 '''
 '''
 def retrieval_blocks(
     create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded
@@ -527,7 +546,7 @@ and has {vector_store._collection.count()} chunks.")
 def instantiate_LLM(LLM_provider,api_key,temperature=0.7,top_p=0.95,model_name=None):
@@ -577,19 +596,9 @@ def instantiate_LLM(LLM_provider,api_key,temperature=0.7,top_p=0.95,model_name=N
         )
     return llm
-"""
-def get_environment_variable(key):
-    if key in os.environ:
-        value = os.environ.get(key)
-        print(f"\n[INFO]: {key} retrieved successfully.")
-    else :
-        print(f"\n[ERROR]: {key} is not found in your environment variables.")
-        value = getpass(f"Insert your {key}")
-    return value
-"""
 def create_memory(model_name='gemini-pro',memory_max_token=None):
 #def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
@@ -621,21 +630,18 @@ def create_memory(model_name='gemini-pro',memory_max_token=None):
 memory = create_memory(model_name='gemini-pro',memory_max_token=None)
 #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
-# save context
 memory.save_context(
-    inputs={"question":"what does DTC stand for?"},
-    outputs={"answer":"""Diffuse to Choose (DTC) is a novel diffusion inpainting approach designed for the Vit-All application,
-    which allows users to virtually place any e-commerce item in any setting, ensuring detailed, semantically coherent blending with realistic
-    lighting and shadows. It effectively incorporates fine-grained cues from the reference image into the main U-Net decoder
-    using a secondary U-Net encoder.
-    DTC can handle a variety of e-commerce products and can generate images using in-the-wild images & references.
-    It is superior to existing zero-shot personalization methods, especially in preserving the fine-grained details of items."""}
 )
 memory.load_memory_variables({})
 standalone_question_template = """Given the following conversation and a follow up question,
 rephrase the follow up question to be a standalone question, in the English language.\n\n
@@ -671,48 +677,6 @@ def answer_template(language="english"):
 answer_prompt = ChatPromptTemplate.from_template(answer_template())
-"""
-# invoke the ChatPromptTemplate
-answer_prompt.invoke(
-    {"question":"plaese ",
-     "context":[Document(page_content="include...")], # the context is a list of retrieved documents.
-     "chat_history":memory.chat_memory}
-)
-"""
-"""
-# Instantiate the retriever and the ConversationalRetrievalChain :
-retriever_Google = retrieval_blocks(
-    create_vectorstore=False,
-    LLM_service="Google",
-    vectorstore_name="Vit_All_Google_Embeddings",
-    retriever_type="Cohere_reranker",
-    base_retriever_search_type="similarity", base_retriever_k=12,
-    compression_retriever_k=16,
-    cohere_api_key=cohere_api_key,cohere_top_n=10,
-)
-chain_gemini,memory_gemini = custom_ConversationalRetrievalChain(
-    llm = instantiate_LLM(
-        LLM_provider="Google",api_key=google_api_key,temperature=0.5,model_name="gemini-pro"
-    ),
-    condense_question_llm = instantiate_LLM(
-        LLM_provider="Google",api_key=google_api_key,temperature=0.1,model_name="gemini-pro"),
-    retriever=retriever_Google,
-    language="english",
-    llm_provider="Google",
-    model_name="gemini-pro"
-)
-memory_gemini.clear()
-"""
 chain = ConversationalRetrievalChain.from_llm(
@@ -733,37 +697,10 @@ chain = ConversationalRetrievalChain.from_llm(
 )
-"""
-# let's invoke the chain
-response = chain.invoke({"question":"what does Google stand for?"})
-print(response['answer'])
-chain.memory.load_memory_variables({})
-follow_up_question = "plaese give more details about it, including its use cases and implementation."
-chain.invoke({"question":follow_up_question})['answer']
-"""
-"""
-# let's invoke the chain
-response = chain.invoke({"question":"what does Google stand for?"})
-print(response['answer'])
-chain.memory.load_memory_variables({})
-follow_up_question = "plaese give more details about it, including its use cases and implementation."
-chain.invoke({"question":follow_up_question})['answer'])
-"""
-# It is not clear to me if this is running. If you take it out, it still provides answers but also using different promptTemplate
 def create_ConversationalRetrievalChain(
     llm,condense_question_llm,
@@ -838,22 +775,18 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
     history = state['messages']
-    #if not prompt:
-    #    return gr.update(value=''), [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)], state
-    #prompt_template = prompt_templates[prompt_template]
     global prompt_template_name
     prompt_template_name = prompt_template
-    print(prompt_template)
     print(prompt_templates[prompt_template])
     completion = chain.invoke({"question":prompt})
-    #print("completion")
     #print(completion)
-    #chain = load_qa_chain(ChatOpenAI(temperature=temperature, max_tokens=max_tokens, model_name="gpt-3.5-turbo"), chain_type="stuff")
-    #completion = chain.run(input_documents=docs, question=query)
     chain.memory.load_memory_variables({})

+# set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
 def langchain_document_loader():
     """
     """
     return documents
 langchain_document_loader()
 text_splitter = RecursiveCharacterTextSplitter(
+# just FYI, does not impact anything
 def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
     """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
 print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
+# For embeddings I am just using the free HF model so others are turned off
 def select_embeddings_model(LLM_service="HuggingFace"):
     """Connect to the embeddings API endpoint by specifying
+# Creates the DB that will hold the embedding vectors
 def create_vectorstore(embeddings,documents,vectorstore_name):
     """Create a Chroma vector database."""
 create_vectorstores = True # change to True to create vectorstores
+# Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
 if create_vectorstores:
     """
     vector_store_OpenAI,_ = create_vectorstore(
     print("")
+# Now we tell it to keep the chromadb persistent so that it can be referenced at any time
 """
 vector_store_OpenAI = Chroma(
     persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
 print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
 """
 vector_store_HF = Chroma(
     persist_directory = current_dir + "/Vit_All_HF_Embeddings",
     embedding_function=embeddings_HuggingFace)
 print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
+# Now we create the code to retrieve embeddings from the vectorstore (again, sticking with HF)
 def Vectorstore_backed_retriever(
 vectorstore,search_type="similarity",k=10,score_threshold=None
+# This next code takes the retrieved embeddings, gets rid of redundant ones, takes out non-useful information, and provides back a shorter embedding for use
 def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
     """Build a ContextualCompressionRetriever.
     We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
     k=16)
+# Can use the following to rank the returned embeddings in order of relevance but all are used anyway so I am skipping for now (can test later)
 '''
 def CohereRerank_retriever(
     base_retriever,
 '''
+#  Don't have to use this, but is brings all the above pieces together in a single series (probably not worth implementing since I have the pieces already)
 '''
 def retrieval_blocks(
     create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded
+# Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot  (this is for responses now, not embeddings)
 def instantiate_LLM(LLM_provider,api_key,temperature=0.7,top_p=0.95,model_name=None):
         )
     return llm
+#  This creates history (memory) of prior questions. I am using Gemini for this but I left the code if I decide to go to GPT later on.
 def create_memory(model_name='gemini-pro',memory_max_token=None):
 #def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
 memory = create_memory(model_name='gemini-pro',memory_max_token=None)
 #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
+# save history as context for the conversation
 memory.save_context(
+    inputs={"question":"."},
+    outputs={"answer":"""."""}
 )
+# loads the template above
 memory.load_memory_variables({})
+# Create the prompt template for the conversation
 standalone_question_template = """Given the following conversation and a follow up question,
 rephrase the follow up question to be a standalone question, in the English language.\n\n
 answer_prompt = ChatPromptTemplate.from_template(answer_template())
 chain = ConversationalRetrievalChain.from_llm(
 )
+# It
 def create_ConversationalRetrievalChain(
     llm,condense_question_llm,
     history = state['messages']
     global prompt_template_name
     prompt_template_name = prompt_template
+    print(prompt_template)  # prints who is responding if I move to multiple experts
     print(prompt_templates[prompt_template])
     completion = chain.invoke({"question":prompt})
     #print(completion)
     chain.memory.load_memory_variables({})