Spaces:

SorbonneUniversity
/

SorboBot

Running

App Files Files Community

Léo Bourrel commited on Nov 9, 2023

Commit

b9e3c29

1 Parent(s): 8505f96

feat: add custom retrieval

Browse files

Files changed (2) hide show

app.py +4 -2
conversation_retrieval_chain.py +64 -0

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import streamlit as st
 import streamlit.components.v1 as components
 from langchain.callbacks import get_openai_callback
-from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.conversation.memory import ConversationBufferMemory
 from langchain.embeddings import GPT4AllEmbeddings
 from langchain.llms import OpenAI
@@ -14,6 +14,8 @@ from connection import connect
 from css import load_css
 from message import Message
 from vector_store import CustomVectorStore
 st.set_page_config(layout="wide")
@@ -50,7 +52,7 @@ def initialize_session_state():
         memory = ConversationBufferMemory(
             output_key="answer", memory_key="chat_history", return_messages=True
         )
-        st.session_state.conversation = ConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=retriever,
             verbose=True,

 import streamlit as st
 import streamlit.components.v1 as components
 from langchain.callbacks import get_openai_callback
 from langchain.chains.conversation.memory import ConversationBufferMemory
 from langchain.embeddings import GPT4AllEmbeddings
 from langchain.llms import OpenAI
 from css import load_css
 from message import Message
 from vector_store import CustomVectorStore
+from conversation_retrieval_chain import CustomConversationalRetrievalChain
 st.set_page_config(layout="wide")
         memory = ConversationBufferMemory(
             output_key="answer", memory_key="chat_history", return_messages=True
         )
+        st.session_state.conversation = CustomConversationalRetrievalChain.from_llm(
             llm=llm,
             retriever=retriever,
             verbose=True,

conversation_retrieval_chain.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import inspect
+from typing import Any, Dict, Optional
+from langchain.chains.conversational_retrieval.base import (
+    ConversationalRetrievalChain,
+    _get_chat_history,
+)
+from langchain.callbacks.manager import CallbackManagerForChainRun
+class CustomConversationalRetrievalChain(ConversationalRetrievalChain):
+    def _handle_docs(self, docs):
+        if len(docs) == 0:
+            return False, "No documents found. Can you rephrase ?"
+        elif len(docs) == 1:
+            return False, "Only one document found. Can you rephrase ?"
+        elif len(docs) > 10:
+            return False, "Too many documents found. Can you specify your request ?"
+        return True, ""
+    def _call(
+        self,
+        inputs: Dict[str, Any],
+        run_manager: Optional[CallbackManagerForChainRun] = None,
+    ) -> Dict[str, Any]:
+        _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
+        question = inputs["question"]
+        get_chat_history = self.get_chat_history or _get_chat_history
+        chat_history_str = get_chat_history(inputs["chat_history"])
+        if chat_history_str:
+            callbacks = _run_manager.get_child()
+            new_question = self.question_generator.run(
+                question=question, chat_history=chat_history_str, callbacks=callbacks
+            )
+        else:
+            new_question = question
+        accepts_run_manager = (
+            "run_manager" in inspect.signature(self._get_docs).parameters
+        )
+        if accepts_run_manager:
+            docs = self._get_docs(new_question, inputs, run_manager=_run_manager)
+        else:
+            docs = self._get_docs(new_question, inputs)  # type: ignore[call-arg]
+        valid_docs, message = self._handle_docs(docs)
+        if not valid_docs:
+            return {
+                self.output_key: message,
+                "source_documents": docs,
+            }
+        new_inputs = inputs.copy()
+        if self.rephrase_question:
+            new_inputs["question"] = new_question
+        new_inputs["chat_history"] = chat_history_str
+        answer = self.combine_docs_chain.run(
+            input_documents=docs, callbacks=_run_manager.get_child(), **new_inputs
+        )
+        output: Dict[str, Any] = {self.output_key: answer}
+        if self.return_source_documents:
+            output["source_documents"] = docs
+        if self.return_generated_question:
+            output["generated_question"] = new_question
+        return output