Spaces:

langtech-innovation
/

wirag

Sleeping

App Files Files Community

acumplid commited on 19 days ago

Commit

e27df4e

1 Parent(s): af12306

modified ui and rag

Browse files

Files changed (2) hide show

app.py +72 -34
rag.py +58 -33

app.py CHANGED Viewed

@@ -8,10 +8,17 @@ from rag import RAG
 from utils import setup
 MAX_NEW_TOKENS = 700
-SHOW_MODEL_PARAMETERS_IN_UI = os.environ.get("SHOW_MODEL_PARAMETERS_IN_UI", default="True") == "True"
 setup()
 rag = RAG(
     vs_hf_repo_path=os.getenv("VS_REPO_NAME"),
     vectorstore_path=os.getenv("VECTORSTORE_PATH"),
@@ -40,6 +47,9 @@ def generate(prompt, model_parameters):
 def submit_input(input_, num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature):
     if input_.strip() == "":
         gr.Warning("Not possible to inference an empty input")
         return None
@@ -89,41 +99,53 @@ def clear():
 def gradio_app():
     with gr.Blocks(theme=theme) as demo:
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
-                    """# Demo de Retrieval-Augmented Generation per la Viquipèdia
-                    🔍 **Retrieval-Augmented Generation** (RAG) és una tecnologia d'IA que permet interrogar un repositori de documents amb preguntes
-                    en llenguatge natural, i combina tècniques de recuperació d'informació avançades amb models generatius per redactar una resposta
-                    fent servir només la informació existent en els documents del repositori.
-                    🎯 **Objectiu:** Aquest és un demostrador amb Viquipèdia i genera la resposta fent servir el model salamandra-7b-instruct.
-                    ⚠️ **Advertencies**: Aquesta versió és experimental. El contingut generat per aquest model no està supervisat i pot ser incorrecte.
-                    Si us plau, tingueu-ho en compte quan exploreu aquest recurs.  El model en inferencia asociat a aquesta demo de desenvolupament no funciona continuament. Si vol fer proves,
-                    contacteu amb nosaltres a Langtech.
-                    """
                 )
-        with gr.Row(equal_height=True):
-            with gr.Column(variant="panel"):
                 input_ = Textbox(
-                    lines=11,
                     label="Input",
                     placeholder="Qui va crear la guerra de les Galaxies ?",
                 )
-                with gr.Row(variant="panel"):
-                    clear_btn = Button(
-                        "Clear",
-                    )
                     submit_btn = Button("Submit", variant="primary", interactive=False)
-                with gr.Row(variant="panel"):
-                    with gr.Accordion("Model parameters", open=False, visible=SHOW_MODEL_PARAMETERS_IN_UI):
                         num_chunks = Slider(
                             minimum=1,
                             maximum=6,
                             step=1,
-                            value=2,
                             label="Number of chunks"
                         )
                         max_new_tokens = Slider(
@@ -166,14 +188,29 @@ def gradio_app():
                         parameters_compontents = [num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature]
-            with gr.Column(variant="panel"):
                 output = Textbox(
                     lines=10,
                     label="Output",
                     interactive=False,
                     show_copy_button=True
                 )
-                with gr.Accordion("Sources and context:", open=False):
                     source_context = gr.Markdown(
                         label="Sources",
                         show_label=False,
@@ -186,8 +223,9 @@ def gradio_app():
                             # autoscroll=False,
                             # show_copy_button=True
                         )
         input_.change(
             fn=change_interactive,
             inputs=[input_],
@@ -219,20 +257,20 @@ def gradio_app():
             outputs=[output, source_context, context_evaluation],
             api_name="get-results"
         )
-        with gr.Row():
-            with gr.Column(scale=0.5):
-                gr.Examples(
-                    examples=[
-                        ["""Qui va crear la guerra de les Galaxies ?"""],
-                    ],
-                    inputs=input_,
-                    outputs=[output, source_context, context_evaluation],
-                    fn=submit_input,
-                )
         demo.launch(show_api=True)
 if __name__ == "__main__":
     gradio_app()

 from utils import setup
 MAX_NEW_TOKENS = 700
+SHOW_MODEL_PARAMETERS_IN_UI = os.environ.get("SHOW_MODEL_PARAMETERS_IN_UI", default="False") == "True"
+import logging
+logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] - %(message)s')
 setup()
+print("Loading RAG model...")
+print("Show model parameters in UI: ", SHOW_MODEL_PARAMETERS_IN_UI)
+# Load the RAG model
 rag = RAG(
     vs_hf_repo_path=os.getenv("VS_REPO_NAME"),
     vectorstore_path=os.getenv("VECTORSTORE_PATH"),
 def submit_input(input_, num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature):
+    """
+    Function to handle the input and call the RAG model for inference.
+    """
     if input_.strip() == "":
         gr.Warning("Not possible to inference an empty input")
         return None
 def gradio_app():
     with gr.Blocks(theme=theme) as demo:
+        # App Description
+        # =====================================================================================================================================
         with gr.Row():
             with gr.Column():
                 gr.Markdown(
+                    # """# Demo de Retrieval-Augmented Generation per la Viquipèdia
+                    # 🔍 **Retrieval-Augmented Generation** (RAG) és una tecnologia d'IA que permet interrogar un repositori de documents amb preguntes
+                    # en llenguatge natural, i combina tècniques de recuperació d'informació avançades amb models generatius per redactar una resposta
+                    # fent servir només la informació existent en els documents del repositori.
+                    # 🎯 **Objectiu:** Aquest és un demostrador amb Viquipèdia i genera la resposta fent servir el model salamandra-7b-instruct.
+                    # ⚠️ **Advertencies**: Aquesta versió és experimental. El contingut generat per aquest model no està supervisat i pot ser incorrecte.
+                    # Si us plau, tingueu-ho en compte quan exploreu aquest recurs.  El model en inferencia asociat a aquesta demo de desenvolupament no funciona continuament. Si vol fer proves,
+                    # contacteu amb nosaltres a Langtech.
+                    # """
                 )
+        # with gr.Row(equal_height=True):
+        with gr.Row(equal_height=False):
+            # User Input
+            # =====================================================================================================================================
+            with gr.Column(scale=2, variant="panel"):
                 input_ = Textbox(
+                    lines=5,
                     label="Input",
                     placeholder="Qui va crear la guerra de les Galaxies ?",
                 )
+                # with gr.Column(variant="panel"):
+                with gr.Row(variant="default"):
+                # with gr.Row(variant="panel"):
+                    clear_btn = Button("Clear",)
                     submit_btn = Button("Submit", variant="primary", interactive=False)
+                # with gr.Row(variant="panel"):
+                with gr.Row(variant="default"):
+                    with gr.Accordion("Model parameters (not used)", open=False, visible=SHOW_MODEL_PARAMETERS_IN_UI):
                         num_chunks = Slider(
                             minimum=1,
                             maximum=6,
                             step=1,
+                            value=5,
                             label="Number of chunks"
                         )
                         max_new_tokens = Slider(
                         parameters_compontents = [num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature]
+                # Add Examples manually
+                gr.Examples(
+                    examples=[
+                        ["Qui va crear la guerra de les Galaxies?"],
+                        ["Quin era el nom real de Voltaire?"],
+                        ["Què fan al BSC?"]
+                    ],
+                    inputs=[input_],  # only inputs
+                )
+            # Output
+            # =====================================================================================================================================
+            with gr.Column(scale=10, variant="panel"):
                 output = Textbox(
                     lines=10,
+                    max_lines=25,
                     label="Output",
                     interactive=False,
                     show_copy_button=True
                 )
+                with gr.Accordion("Sources and context:", open=False, visible=False):
                     source_context = gr.Markdown(
                         label="Sources",
                         show_label=False,
                             # autoscroll=False,
                             # show_copy_button=True
                         )
+        # Event Handlers
+        # =====================================================================================================================================
         input_.change(
             fn=change_interactive,
             inputs=[input_],
             outputs=[output, source_context, context_evaluation],
             api_name="get-results"
         )
+        # =====================================================================================================================================
+        # # Output
+        # with gr.Row():
+        #     with gr.Column(scale=0.5):
+        #         gr.Examples(
+        #             examples=[["""Qui va crear la guerra de les Galaxies ?"""],],
+        #             inputs=input_,
+        #             outputs=[output, source_context, context_evaluation],
+        #             fn=submit_input,
+        #         )
         demo.launch(show_api=True)
 if __name__ == "__main__":
     gradio_app()

rag.py CHANGED Viewed

@@ -10,6 +10,10 @@ from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
 class RAG:
     NO_ANSWER_MESSAGE: str = "Ho sento, no he pogut respondre la teva pregunta."
@@ -26,11 +30,15 @@ class RAG:
         self.rerank_number_contexts = rerank_number_contexts
         # load vectore store
-        hf_vectorstore = snapshot_download(repo_id=vs_hf_repo_path)
         embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
-        self.vectore_store = FAISS.load_local(hf_vectorstore, embeddings, allow_dangerous_deserialization=True)
-        # self.vectore_store = FAISS.load_local(self.vectorstore_path, embeddings, allow_dangerous_deserialization=True)#, allow_dangerous_deserialization=True)
         logging.info("RAG loaded!")
         logging.info( self.vectore_store)
@@ -44,44 +52,52 @@ class RAG:
         tokenizer = AutoTokenizer.from_pretrained(rerank_model)
         model = AutoModelForSequenceClassification.from_pretrained(rerank_model)
         def get_score(query, passage):
             """Calculate the relevance score of a passage with respect to a query."""
             inputs = tokenizer(query, passage, return_tensors='pt', truncation=True, padding=True, max_length=512)
             with torch.no_grad():
                 outputs = model(**inputs)
             logits = outputs.logits
             score = logits.view(-1, ).float()
             return score
         scores = [get_score(instruction, c[0].page_content) for c in contexts]
         combined = list(zip(contexts, scores))
         sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
         sorted_texts, _ = zip(*sorted_combined)
         return sorted_texts[:number_of_contexts]
-    def get_context(self, instruction, number_of_contexts=2):
         """Retrieve the most relevant contexts for a given instruction."""
         logging.info("RETRIEVE DOCUMENTS")
-        documentos = self.vectore_store.similarity_search_with_score(instruction, k=self.rerank_number_contexts)
-        # logging.info(documentos)
-        logging.info("RERANK DOCUMENTS")
-        documentos = self.rerank_contexts(instruction, documentos, number_of_contexts=number_of_contexts)
-        # logging.info(documentos)
-        print("Reranked documents")
-        return documentos
     def predict_dolly(self, instruction, context, model_parameters):
         api_key = os.getenv("HF_TOKEN")
@@ -155,26 +171,35 @@ class RAG:
     def get_response(self, prompt: str, model_parameters: dict) -> str:
         try:
             docs = self.get_context(prompt, model_parameters["NUM_CHUNKS"])
-            text_context, full_context, source = self.beautiful_context(docs)
-            print("#"*100)
-            logging.info("text_context")
-            logging.info(text_context)
-            print("#"*100)
-            logging.info("full context")
-            logging.info(full_context)
-            print("#"*100)
-            logging.info("source")
-            logging.info(source)
-            del model_parameters["NUM_CHUNKS"]
-            response = self.predict_completion(prompt, text_context, model_parameters)
             if not response:
                 return self.NO_ANSWER_MESSAGE
             return response, full_context, source
         except Exception as err:
             print(err)

 from langchain_community.embeddings import HuggingFaceEmbeddings
+logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] - %(message)s')
+# logging.getLogger().setLevel(logging.INFO)
 class RAG:
     NO_ANSWER_MESSAGE: str = "Ho sento, no he pogut respondre la teva pregunta."
         self.rerank_number_contexts = rerank_number_contexts
         # load vectore store
         embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
+        if vs_hf_repo_path:
+            hf_vectorstore = snapshot_download(repo_id=vs_hf_repo_path)
+            self.vectore_store = FAISS.load_local(hf_vectorstore, embeddings, allow_dangerous_deserialization=True)
+        else:
+            self.vectore_store = FAISS.load_local(self.vectorstore_path, embeddings, allow_dangerous_deserialization=True)
         logging.info("RAG loaded!")
         logging.info( self.vectore_store)
         tokenizer = AutoTokenizer.from_pretrained(rerank_model)
         model = AutoModelForSequenceClassification.from_pretrained(rerank_model)
+        logging.info("Rerank model loaded!")
         def get_score(query, passage):
             """Calculate the relevance score of a passage with respect to a query."""
             inputs = tokenizer(query, passage, return_tensors='pt', truncation=True, padding=True, max_length=512)
+            print("Inputs: ", inputs)
             with torch.no_grad():
                 outputs = model(**inputs)
             logits = outputs.logits
             score = logits.view(-1, ).float()
+            print("Score: ", score)
             return score
         scores = [get_score(instruction, c[0].page_content) for c in contexts]
+        print("Scores: ", scores)
         combined = list(zip(contexts, scores))
         sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
         sorted_texts, _ = zip(*sorted_combined)
         return sorted_texts[:number_of_contexts]
+    def get_context(self, instruction, number_of_contexts=3):
         """Retrieve the most relevant contexts for a given instruction."""
         logging.info("RETRIEVE DOCUMENTS")
+        documents_retrieved = self.vectore_store.similarity_search_with_score(instruction, k=self.rerank_number_contexts)
+        logging.info(f"Documents retrieved: {len(documents_retrieved)}")
+        if self.rerank_model:
+            logging.info("RERANK DOCUMENTS")
+            documents_reranked = self.rerank_contexts(instruction, documents_retrieved, number_of_contexts=number_of_contexts)
+        else:
+            logging.info("NO RERANKING")
+            documents_reranked = documents_retrieved[:number_of_contexts]
+        return documents_reranked
     def predict_dolly(self, instruction, context, model_parameters):
         api_key = os.getenv("HF_TOKEN")
     def get_response(self, prompt: str, model_parameters: dict) -> str:
         try:
             docs = self.get_context(prompt, model_parameters["NUM_CHUNKS"])
+            response = ""
+            for i, (doc, score) in enumerate(docs):
+                response += "\n\n" + "="*100
+                response += f"\nDocument {i+1}"
+                response += "\n" + "="*100
+                response += f"\nScore: {score:.5f}"
+                response += f"\nTitle: {doc.metadata['title']}"
+                response += f"\nURL: {doc.metadata['url']}"
+                response += f"\nID: {doc.metadata['id']}"
+                response += f"\nStart index: {doc.metadata['start_index']}"
+                # response += f"\nSource: {doc.metadata['src']}"
+                # response += f"\nRedirected: {doc.metadata['redirected']}"
+                # url = doc.metadata['url']
+                # response += f"\nRevision ID: {url}"
+                # response += f'\nURL: <a href="{url}" target="_blank">{url}</a><br>'
+                response += "\n" + "-"*100 + "\n"
+                response += f"\nContent:\n"
+                response += doc.page_content
+            full_context = ""
+            source = []
             if not response:
                 return self.NO_ANSWER_MESSAGE
             return response, full_context, source
         except Exception as err:
             print(err)