Spaces:

LISA-Kadi
/

LISA-demo

Running

App Files Files Community

Kadi-IAM commited on Oct 7, 2024

Commit

646f8c2

1 Parent(s): e607fab

Remove not used codes

Browse files

Files changed (6) hide show

app.py +5 -6
embeddings.py +1 -1
llms.py +8 -77
preprocess_documents.py +59 -0
models.py → ragchain.py +28 -201
vectorestores.py +1 -1

app.py CHANGED Viewed

@@ -5,17 +5,16 @@ from pathlib import Path
 from dotenv import load_dotenv
 import pickle
-from llms import get_groq_chat
 import gradio as gr
 from huggingface_hub import login
 from documents import load_pdf_as_docs, load_xml_as_docs
 from vectorestores import get_faiss_vectorestore
-from langchain.vectorstores import FAISS
 # For debug
 # from langchain.globals import set_debug
@@ -100,7 +99,7 @@ llm = get_groq_chat(model_name="llama-3.1-70b-versatile")
 # # # Create conversation qa chain (Note: conversation is not supported yet)
-from models import RAGChain
 rag_chain = RAGChain()
 lisa_qa_conversation = rag_chain.create(rerank_retriever, llm, add_citation=True)
@@ -213,8 +212,8 @@ def postprocess_citation(text, source_docs):
     # print(f"source ids by re: {source_ids}")
     # source_ids = re.findall(r"\[\[(.*?)\]\]", text)  # List[Char]
     aligned_source_ids = list(map(lambda x: int(x) - 1, source_ids))  # shift index-1
-    # print(f"souce ids generated by llm: {aligned_source_ids}")
-    # Filter fake souce ids as LLM might generate false source ids
     candidate_source_ids = list(range(len(source_docs)))
     filtered_source_ids = set(
         [i for i in aligned_source_ids if i in candidate_source_ids]

 from dotenv import load_dotenv
 import pickle
 import gradio as gr
 from huggingface_hub import login
+from langchain.vectorstores import FAISS
+from llms import get_groq_chat
 from documents import load_pdf_as_docs, load_xml_as_docs
 from vectorestores import get_faiss_vectorestore
 # For debug
 # from langchain.globals import set_debug
 # # # Create conversation qa chain (Note: conversation is not supported yet)
+from ragchain import RAGChain
 rag_chain = RAGChain()
 lisa_qa_conversation = rag_chain.create(rerank_retriever, llm, add_citation=True)
     # print(f"source ids by re: {source_ids}")
     # source_ids = re.findall(r"\[\[(.*?)\]\]", text)  # List[Char]
     aligned_source_ids = list(map(lambda x: int(x) - 1, source_ids))  # shift index-1
+    # print(f"source ids generated by llm: {aligned_source_ids}")
+    # Filter fake source ids as LLM might generate false source ids
     candidate_source_ids = list(range(len(source_docs)))
     filtered_source_ids = set(
         [i for i in aligned_source_ids if i in candidate_source_ids]

embeddings.py CHANGED Viewed

@@ -20,7 +20,7 @@ def get_hf_embeddings(model_name=None):
 def get_jinaai_embeddings(model_name="jinaai/jina-embeddings-v2-base-en", device="auto"):
-    """Get jinnai embedding."""
     # device: cpu or cuda
     if device == "auto":

 def get_jinaai_embeddings(model_name="jinaai/jina-embeddings-v2-base-en", device="auto"):
+    """Get jinaai embedding."""
     # device: cpu or cuda
     if device == "auto":

llms.py CHANGED Viewed

@@ -1,44 +1,17 @@
 # from langchain import HuggingFaceHub, LLMChain
-from langchain.chains import LLMChain
 from langchain.llms import HuggingFacePipeline
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     pipeline,
-    T5Tokenizer,
-    T5ForConditionalGeneration,
-    GPT2TokenizerFast,
 )
 from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
 from langchain_groq import ChatGroq
-# model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
-# model = LlamaForCausalLM.from_pretrained(model_path, device_map="auto")#, load_in_8bit=True)
-# tokenizer = AutoTokenizer.from_pretrained(model_path)
 from langchain.chat_models import ChatOpenAI
-# from langchain_openai import ChatOpenAI
-# from langchain_openai import ChatOpenAI
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import Chroma
-from langchain.text_splitter import (
-    CharacterTextSplitter,
-    RecursiveCharacterTextSplitter,
-)
-from langchain.document_loaders import TextLoader, UnstructuredHTMLLoader, PyPDFLoader
-from langchain.chains.retrieval_qa.base import RetrievalQA
-from langchain.llms import HuggingFaceHub
-from dotenv import load_dotenv
 from langchain.llms import HuggingFaceTextGenInference
-from langchain.chains.question_answering import load_qa_chain
-from langchain.chains import ConversationalRetrievalChain
-from langchain.chains.conversation.memory import (
-    ConversationBufferMemory,
-    ConversationBufferWindowMemory,
-)
 def get_llm_hf_online(inference_api_url=""):
@@ -50,20 +23,12 @@ def get_llm_hf_online(inference_api_url=""):
         )
     llm = HuggingFaceTextGenInference(
-        # cache=None,  # Optional: Cache verwenden oder nicht
         verbose=True,  # Provides detailed logs of operation
-        # callbacks=[StreamingStdOutCallbackHandler()],  # Handeling Streams
         max_new_tokens=1024,  # Maximum number of token that can be generated.
-        # top_k=2,  # Die Anzahl der Top-K Tokens, die beim Generieren berücksichtigt werden sollen
         top_p=0.95,  # Threshold for controlling randomness in text generation process.
-        typical_p=0.95,  #
-        temperature=0.1,  # For choosing probable words.
-        # repetition_penalty=None,  # Wiederholungsstrafe beim Generieren
-        # truncate=None,  # Schneidet die Eingabe-Tokens auf die gegebene Größe
-        # stop_sequences=None,  # Eine Liste von Stop-Sequenzen beim Generieren
-        inference_server_url=inference_api_url,  # URL des Inferenzservers
         timeout=10,  # Timeout for connection  with the url
-        # streaming=True,  # Streaming the answer
     )
     return llm
@@ -72,12 +37,9 @@ def get_llm_hf_online(inference_api_url=""):
 def get_llm_hf_local(model_path):
     """Get local LLM."""
-    # model_path = "/mnt/localstorage/yinghan/llm/orca_mini_v3_13b"
-    # model_path = "/mnt/localstorage/yinghan/llm/zephyr-7b-beta"
-    model = LlamaForCausalLM.from_pretrained(  # or AutoModelForCausalLM. TODO: which is better? what's difference?
         model_path, device_map="auto"
-    )  # , load_in_8bit=True)
-    # model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")#, load_in_8bit=True)  # which is better?
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     # print('making a pipeline...')
@@ -86,8 +48,8 @@ def get_llm_hf_local(model_path):
         "text-generation",
         model=model,
         tokenizer=tokenizer,
-        max_new_tokens=1024,  # need better set
-        model_kwargs={"temperature": 0.1},  # need better set
     )
     llm = HuggingFacePipeline(pipeline=pipe)
@@ -95,51 +57,20 @@ def get_llm_hf_local(model_path):
-def get_llm_openai_chat(model_name, inference_server_url, langfuse_callback=None):
     """Get openai-like LLM."""
-    # Some defaults
-    # chat_model_name = "openchat/openchat_3.5"
-    # inference_server_url = "http://localhost:8080/v1"
     llm = ChatOpenAI(
         model=model_name,
         openai_api_key="EMPTY",
         openai_api_base=inference_server_url,
         max_tokens=1024,  # better setting?
-        temperature=0,  # default 0.7, better setting?
-        # callbacks=[langfuse_callback],
     )
-    # The following is not required for builing normal llm
-    # use the Ragas LangchainLLM wrapper to create a RagasLLM instance
-    # vllm = LangchainLLM(llm=chat)
-    # return vllm
     return llm
-def get_chat_vllm(model_name, inference_server_url, langfuse_callback=None):
-    # to fix
-    # Create vLLM Langchain instance
-    # Some defaults
-    # chat_model_name = "openchat/openchat_3.5"
-    # inference_server_url = "http://localhost:8080/v1"
-    chat = ChatOpenAI(
-        model=model_name,
-        openai_api_key="EMPTY",
-        openai_api_base=inference_server_url,
-        max_tokens=512,  # better setting?
-        temperature=0.1,  # default 0.7, better setting?
-        # callbacks=[langfuse_callback],
-    )
-    # The following is not required for builing normal llm
-    # use the Ragas LangchainLLM wrapper to create a RagasLLM instance
-    # vllm = LangchainLLM(llm=chat)
-    # return vllm
-    return chat
 def  get_groq_chat(model_name="llama-3.1-70b-versatile"):
     llm = ChatGroq(temperature=0, model_name=model_name)

 # from langchain import HuggingFaceHub, LLMChain
 from langchain.llms import HuggingFacePipeline
 from transformers import (
     AutoModelForCausalLM,
     AutoTokenizer,
     pipeline,
 )
 from transformers import LlamaForCausalLM, AutoModelForCausalLM, LlamaTokenizer
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain_groq import ChatGroq
 from langchain.chat_models import ChatOpenAI
 from langchain.llms import HuggingFaceTextGenInference
 def get_llm_hf_online(inference_api_url=""):
         )
     llm = HuggingFaceTextGenInference(
         verbose=True,  # Provides detailed logs of operation
         max_new_tokens=1024,  # Maximum number of token that can be generated.
         top_p=0.95,  # Threshold for controlling randomness in text generation process.
+        temperature=0.1,
+        inference_server_url=inference_api_url,
         timeout=10,  # Timeout for connection  with the url
     )
     return llm
 def get_llm_hf_local(model_path):
     """Get local LLM."""
+    model = LlamaForCausalLM.from_pretrained(
         model_path, device_map="auto"
+    )
     tokenizer = AutoTokenizer.from_pretrained(model_path)
     # print('making a pipeline...')
         "text-generation",
         model=model,
         tokenizer=tokenizer,
+        max_new_tokens=1024,  # better setting?
+        model_kwargs={"temperature": 0.1},  # better setting?
     )
     llm = HuggingFacePipeline(pipeline=pipe)
+def get_llm_openai_chat(model_name, inference_server_url):
     """Get openai-like LLM."""
     llm = ChatOpenAI(
         model=model_name,
         openai_api_key="EMPTY",
         openai_api_base=inference_server_url,
         max_tokens=1024,  # better setting?
+        temperature=0,
     )
     return llm
 def  get_groq_chat(model_name="llama-3.1-70b-versatile"):
     llm = ChatGroq(temperature=0, model_name=model_name)

preprocess_documents.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+Load and parse files (pdf) in the data/documents and save cached pkl files.
+"""
+import os
+import pickle
+from dotenv import load_dotenv
+from huggingface_hub import login
+from documents import load_pdf_as_docs, get_doc_chunks
+from embeddings import get_jinaai_embeddings
+# Load and set env variables
+load_dotenv()
+# Set huggingface api for downloading embedding model
+HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"]
+login(HUGGINGFACEHUB_API_TOKEN)
+def save_to_pickle(obj, filename):
+    with open(filename, "wb") as file:
+        pickle.dump(obj, file, pickle.HIGHEST_PROTOCOL)
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+database_root = "./data/db"
+document_path = "./data/documents"
+# Parse pdf as "Documents" instances and save as "docs.pkl"
+docs = load_pdf_as_docs(document_path)
+save_to_pickle(docs, os.path.join(database_root, "docs.pkl"))
+# Get text chunks and save as "docs_chunks.pkl"
+document_chunks = get_doc_chunks(docs)
+save_to_pickle(docs, os.path.join(database_root, "docs_chunks.pkl"))
+embeddings = get_jinaai_embeddings(device="auto")
+# Create and save vectorstore
+from vectorestores import get_faiss_vectorestore
+vectorstore = get_faiss_vectorestore(embeddings)
+# Create retrievers
+from retrievers import get_parent_doc_retriever
+# Get parent doc (small-to-big) retriever and save as "docstore.pkl"
+parent_doc_retriever = get_parent_doc_retriever(
+    docs,
+    vectorstore,
+    save_path_root=database_root,
+    save_vectorstore=True,
+    save_docstore=True,
+)

models.py → ragchain.py RENAMED Viewed

@@ -1,30 +1,39 @@
 from langchain.chains import LLMChain
-from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, PromptTemplate
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.conversation.memory import (
-    ConversationBufferMemory,
     ConversationBufferWindowMemory,
 )
-from langchain.chains import RetrievalQAWithSourcesChain, StuffDocumentsChain
 def get_cite_combine_docs_chain(llm):
     # Ref: https://github.com/langchain-ai/langchain/issues/7239
     # Function to format each document with an index, source, and content.
     def format_document(doc, index, prompt):
         """Format a document into a string based on a prompt template."""
         # Create a dictionary with document content and metadata.
-        base_info = {"page_content": doc.page_content, "index": index, "source": doc.metadata["source"]}
         # Check if any metadata is missing.
         missing_metadata = set(prompt.input_variables).difference(base_info)
         if len(missing_metadata) > 0:
             raise ValueError(f"Missing metadata: {list(missing_metadata)}.")
         # Filter only necessary variables for the prompt.
         document_info = {k: base_info[k] for k in prompt.input_variables}
         return prompt.format(**document_info)
@@ -37,10 +46,16 @@ def get_cite_combine_docs_chain(llm):
                 format_document(doc, i, self.document_prompt)
                 for i, doc in enumerate(docs, 1)
             ]
             # Filter only relevant input variables for the LLM chain prompt.
-            inputs = {k: v for k, v in kwargs.items() if k in self.llm_chain.prompt.input_variables}
-            inputs[self.document_variable_name] = self.document_separator.join(doc_strings)
             return inputs
     # Ref: https://huggingface.co/spaces/Ekimetrics/climate-question-answering/blob/main/climateqa/engine/prompts.py
@@ -68,7 +83,7 @@ def get_cite_combine_docs_chain(llm):
         -----------------------
         Question: {question}
-        Helpful Answer with format citations:"""
     )
     # Initialize the custom chain with a specific document format.
@@ -83,194 +98,8 @@ def get_cite_combine_docs_chain(llm):
         ),
         document_variable_name="context",
     )
-    return combine_docs_chain
-class ConversationChainFactory:
-    def __init__(
-        self, memory_key="chat_history", output_key="answer", return_messages=True
-    ):
-        self.memory_key = memory_key
-        self.output_key = output_key
-        self.return_messages = return_messages
-    def create(self, retriever, llm):
-        memory = ConversationBufferWindowMemory(  # ConversationBufferMemory(
-            memory_key=self.memory_key,
-            return_messages=self.return_messages,
-            output_key=self.output_key,
-        )
-        # prompt:
-        # https://github.com/langchain-ai/langchain/issues/6530
-        prompt_template = """You are a helpful research assistant. Use the following pieces of context to answer the question at the end.
-        Please ignore the contexts if they are not related to the question. If you don't know the answer, just say that you don't know,
-        don't try to make up an answer.
-        {context}
-        Question: {question}
-        Helpful Answer:"""
-        PROMPT = PromptTemplate(
-            template=prompt_template, input_variables=["context", "question"]
-        )
-        # Rephrase question based on history
-        # https://www.paepper.com/blog/posts/how-to-build-a-chatbot-out-of-your-website-content/
-        # tested: Be careful with the technical abbreviations and items, do not modify them unless necessary -> worse
-        # You are a helpful research assistant.  -> worse, tend to expand question
-        # My testing prompt
-        # _template = """Given the following conversation and a follow up question,
-        # rephrase the follow up question to be a standalone question only when it is necessary.
-        # If the conversation is not related to the question, do not rephrase the follow up question
-        # and just put the standalone question exactly the same as the original follow up question.
-        # The standalone question should be in its original language, which is usually english.
-        # Chat History: {chat_history}
-        # Follow Up Question: {question}
-        # Standalone Question:"""
-        # Type 2: https://github.com/langchain-ai/langchain/issues/4076
-        _template = """Return text in the original language of the follow up question.
-            If the follow up question does not need context, return the exact same text back.
-            Never rephrase the follow up question given the chat history unless the follow up question needs context.
-            Chat History: {chat_history}
-            Follow Up Question: {question}
-            Standalone Question:"""
-        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-        # or just turn if off, see https://github.com/langchain-ai/langchain/issues/4076
-        # Change prompt to context-based QA
-        # system_template = """You are a professional scientist. Use the following pieces of context to answer the users question.
-        # Please ignore the contexts if they are not related to the question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-        # ----------------
-        # {context}"""
-        # messages = [
-        #     SystemMessagePromptTemplate.from_template(system_template),
-        #     HumanMessagePromptTemplate.from_template("{question}"),
-        # ]
-        # QA_CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
-        # https://github.com/langchain-ai/langchain/issues/4608
-        conversation_chain = ConversationalRetrievalChain.from_llm(
-            llm=llm,
-            retriever=retriever,
-            memory=memory,
-            return_source_documents=True,
-            # return_generated_question=True,  # for debug
-            rephrase_question=False,  # Disable rephrase, for test purpose
-            get_chat_history=lambda x: x,
-            # verbose=True,
-            # combine_docs_chain_kwargs={"prompt": PROMPT},
-            # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
-        )
-        return conversation_chain
-class ConversationChainFactoryDev:
-    def __init__(
-        self, memory_key="chat_history", output_key="answer", return_messages=True
-    ):
-        self.memory_key = memory_key
-        self.output_key = output_key
-        self.return_messages = return_messages
-    def create(self, retriever, llm):
-        memory = ConversationBufferWindowMemory(  # ConversationBufferMemory(
-            memory_key=self.memory_key,
-            return_messages=self.return_messages,
-            output_key=self.output_key,
-        )
-        # prompt:
-        # https://github.com/langchain-ai/langchain/issues/6530
-        prompt_template = """You are a helpful research assistant. Use the following pieces of context to answer the question at the end.
-        Please ignore the contexts if they are not related to the question. If you don't know the answer, just say that you don't know,
-        don't try to make up an answer.
-        {context}
-        Question: {question}
-        Helpful Answer:"""
-        PROMPT = PromptTemplate(
-            template=prompt_template, input_variables=["context", "question"]
-        )
-        # Rephrase question based on history
-        # https://www.paepper.com/blog/posts/how-to-build-a-chatbot-out-of-your-website-content/
-        # tested: Be careful with the technical abbreviations and items, do not modify them unless necessary -> worse
-        # You are a helpful research assistant.  -> worse, tend to expand question
-        # My testing prompt
-        # _template = """Given the following conversation and a follow up question,
-        # rephrase the follow up question to be a standalone question only when it is necessary.
-        # If the conversation is not related to the question, do not rephrase the follow up question
-        # and just put the standalone question exactly the same as the original follow up question.
-        # The standalone question should be in its original language, which is usually english.
-        # Chat History: {chat_history}
-        # Follow Up Question: {question}
-        # Standalone Question:"""
-        # Type 2: https://github.com/langchain-ai/langchain/issues/4076
-        _template = """Return text in the original language of the follow up question.
-            If the follow up question does not need context, return the exact same text back.
-            Never rephrase the follow up question given the chat history unless the follow up question needs context.
-            Chat History: {chat_history}
-            Follow Up Question: {question}
-            Standalone Question:"""
-        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
-        # or just turn if off, see https://github.com/langchain-ai/langchain/issues/4076
-        # Change prompt to context-based QA
-        # system_template = """You are a professional scientist. Use the following pieces of context to answer the users question.
-        # Please ignore the contexts if they are not related to the question. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-        # ----------------
-        # {context}"""
-        # messages = [
-        #     SystemMessagePromptTemplate.from_template(system_template),
-        #     HumanMessagePromptTemplate.from_template("{question}"),
-        # ]
-        # QA_CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
-        # https://github.com/langchain-ai/langchain/issues/4608
-        conversation_chain = ConversationalRetrievalChain.from_llm(
-            llm=llm,
-            retriever=retriever,
-            memory=memory,
-            return_source_documents=True,
-            # return_generated_question=True,  # for debug
-            rephrase_question=False,  # Disable rephrase, for test purpose
-            get_chat_history=lambda x: x,
-            # verbose=True,
-            # combine_docs_chain_kwargs={"prompt": PROMPT},
-            # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
-        )
-        return conversation_chain
 class RAGChain:
@@ -302,12 +131,10 @@ class RAGChain:
             # combine_docs_chain_kwargs={"prompt": PROMPT},  # additional prompt control
             # condense_question_prompt=CONDENSE_QUESTION_PROMPT,  # additional prompt control
         )
         # Add citation, ATTENTION: experimental
         if add_citation:
-            # from models import get_cite_combine_docs_chain
             cite_combine_docs_chain = get_cite_combine_docs_chain(llm)
             conversation_chain.combine_docs_chain = cite_combine_docs_chain
         return conversation_chain

 from langchain.chains import LLMChain
+from langchain.prompts import (
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+    ChatPromptTemplate,
+    PromptTemplate,
+)
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.conversation.memory import (
     ConversationBufferWindowMemory,
 )
+from langchain.chains import StuffDocumentsChain
 def get_cite_combine_docs_chain(llm):
     # Ref: https://github.com/langchain-ai/langchain/issues/7239
     # Function to format each document with an index, source, and content.
     def format_document(doc, index, prompt):
         """Format a document into a string based on a prompt template."""
         # Create a dictionary with document content and metadata.
+        base_info = {
+            "page_content": doc.page_content,
+            "index": index,
+            "source": doc.metadata["source"],
+        }
         # Check if any metadata is missing.
         missing_metadata = set(prompt.input_variables).difference(base_info)
         if len(missing_metadata) > 0:
             raise ValueError(f"Missing metadata: {list(missing_metadata)}.")
         # Filter only necessary variables for the prompt.
         document_info = {k: base_info[k] for k in prompt.input_variables}
         return prompt.format(**document_info)
                 format_document(doc, i, self.document_prompt)
                 for i, doc in enumerate(docs, 1)
             ]
             # Filter only relevant input variables for the LLM chain prompt.
+            inputs = {
+                k: v
+                for k, v in kwargs.items()
+                if k in self.llm_chain.prompt.input_variables
+            }
+            inputs[self.document_variable_name] = self.document_separator.join(
+                doc_strings
+            )
             return inputs
     # Ref: https://huggingface.co/spaces/Ekimetrics/climate-question-answering/blob/main/climateqa/engine/prompts.py
         -----------------------
         Question: {question}
+        Helpful Answer with format citations:""",
     )
     # Initialize the custom chain with a specific document format.
         ),
         document_variable_name="context",
     )
+    return combine_docs_chain
 class RAGChain:
             # combine_docs_chain_kwargs={"prompt": PROMPT},  # additional prompt control
             # condense_question_prompt=CONDENSE_QUESTION_PROMPT,  # additional prompt control
         )
         # Add citation, ATTENTION: experimental
         if add_citation:
             cite_combine_docs_chain = get_cite_combine_docs_chain(llm)
             conversation_chain.combine_docs_chain = cite_combine_docs_chain
         return conversation_chain

vectorestores.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from langchain.vectorstores import Chroma, FAISS
 def get_faiss_vectorestore(embeddings):
-    # Add extra text to ini
     texts = ["LISA - Lithium Ion Solid-state Assistant"]
     vectorstore = FAISS.from_texts(texts, embeddings)

 from langchain.vectorstores import Chroma, FAISS
 def get_faiss_vectorestore(embeddings):
+    # Add extra text to init
     texts = ["LISA - Lithium Ion Solid-state Assistant"]
     vectorstore = FAISS.from_texts(texts, embeddings)