Spaces:

langtech-innovation
/

wirag

Sleeping

App Files Files Community

acumplid commited on 19 days ago

Commit

f75ccae

1 Parent(s): cdb6cea

initial commit

Browse files

Files changed (7) hide show

README.md +5 -5
app.py +246 -0
handler.py +14 -0
input_reader.py +22 -0
rag.py +180 -0
requirements.txt +14 -0
utils.py +33 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Wirag
-emoji: 🐠
-colorFrom: red
-colorTo: purple
 sdk: gradio
-sdk_version: 5.27.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 ---
+title: EADOP RAG
+emoji: 💻
+colorFrom: indigo
+colorTo: yellow
 sdk: gradio
+sdk_version: 4.24.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import os
+import gradio as gr
+from gradio.components import Textbox, Button, Slider, Checkbox
+from AinaTheme import theme
+from urllib.error import HTTPError
+from rag import RAG
+from utils import setup
+MAX_NEW_TOKENS = 700
+SHOW_MODEL_PARAMETERS_IN_UI = os.environ.get("SHOW_MODEL_PARAMETERS_IN_UI", default="True") == "True"
+setup()
+rag = RAG(
+    vs_hf_repo_path=os.getenv("VS_REPO_NAME"),
+    vectorstore_path=os.getenv("VECTORSTORE_PATH"),
+    hf_token=os.getenv("HF_TOKEN"),
+    embeddings_model=os.getenv("EMBEDDINGS"),
+    model_name=os.getenv("MODEL"),
+    rerank_model=os.getenv("RERANK_MODEL"),
+    rerank_number_contexts=int(os.getenv("RERANK_NUMBER_CONTEXTS"))
+)
+def generate(prompt, model_parameters):
+    try:
+        output, context, source = rag.get_response(prompt, model_parameters)
+        return output, context, source
+    except HTTPError as err:
+        if err.code == 400:
+            gr.Warning(
+                "The inference endpoint is only available Monday through Friday, from 08:00 to 20:00 CET."
+            )
+    except:
+        gr.Warning(
+            "Inference endpoint is not available right now. Please try again later."
+        )
+    return None, None, None
+def submit_input(input_, num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature):
+    if input_.strip() == "":
+        gr.Warning("Not possible to inference an empty input")
+        return None
+    model_parameters = {
+        "NUM_CHUNKS": num_chunks,
+        "max_new_tokens": max_new_tokens,
+        "repetition_penalty": repetition_penalty,
+        "top_k": top_k,
+        "top_p": top_p,
+        "do_sample": do_sample,
+        "temperature": temperature
+    }
+    output, context, source = generate(input_, model_parameters)
+    sources_markup = ""
+    for url in source:
+        sources_markup += f'<a href="{url}" target="_blank">{url}</a><br>'
+    return output, sources_markup, context
+    # return output.strip(), sources_markup, context
+def change_interactive(text):
+    if len(text) == 0:
+        return gr.update(interactive=True), gr.update(interactive=False)
+    return gr.update(interactive=True), gr.update(interactive=True)
+def clear():
+    return (
+        None,
+        None,
+        None,
+        None,
+        gr.Slider(value=2.0),
+        gr.Slider(value=MAX_NEW_TOKENS),
+        gr.Slider(value=1.0),
+        gr.Slider(value=50),
+        gr.Slider(value=0.99),
+        gr.Checkbox(value=False),
+        gr.Slider(value=0.35),
+    )
+def gradio_app():
+    with gr.Blocks(theme=theme) as demo:
+        with gr.Row():
+            # with gr.Column(scale=0.1):
+            #     # gr.Image("rag_image.jpg", elem_id="flor-banner", scale=1, height=256, width=256, show_label=False, show_download_button = False, show_share_button = False)
+            with gr.Column():
+                gr.Markdown(
+                    """# Demo de Retrieval-Augmented Generation per documents legals
+                    🔍 **Retrieval-Augmented Generation** (RAG) és una tecnologia d'IA que permet interrogar un repositori de documents amb preguntes
+                    en llenguatge natural, i combina tècniques de recuperació d'informació avançades amb models generatius per redactar una resposta
+                    fent servir només la informació existent en els documents del repositori.
+                    🎯 **Objectiu:** Aquest és un demostrador amb la normativa vigent publicada al Diari Oficial de la Generalitat de Catalunya, en el
+                    repositori del EADOP (Entitat Autònoma del Diari Oficial i de Publicacions). Aquesta versió explora prop de 2000 documents en català,
+                    i genera la resposta fent servir el model Salamandra-7b-aligned-EADOP, el model BSC-LT/salamandra-7b-instruct alineat amb el dataset de alinia/EADOP-RAG-out-of-domain.
+                    ⚠️ **Advertencies**: Aquesta versió és experimental. El contingut generat per aquest model no està supervisat i pot ser incorrecte.
+                    Si us plau, tingueu-ho en compte quan exploreu aquest recurs.  El model en inferencia asociat a aquesta demo de desenvolupament no funciona continuament. Si vol fer proves,
+                    contacteu amb nosaltres a Langtech.
+                    👀 **Mes informació en els informes de: ** [RAG](https://drive.google.com/file/d/11MgXQXAxfhkqbrx8syrKtmBrNP_6Qhx9/view?usp=sharing) i [Alineació](https://drive.google.com/file/d/1VUqHKO-gDmgMozK-Al83a2kh4Fr70pHh/view?usp=sharing) en pdf (ànglés).
+                    """
+                )
+        with gr.Row(equal_height=True):
+            with gr.Column(variant="panel"):
+                input_ = Textbox(
+                    lines=11,
+                    label="Input",
+                    placeholder="Quina és la finalitat del Servei Meteorològic de Catalunya?",
+                    # value = "Quina és la finalitat del Servei Meteorològic de Catalunya?"
+                )
+                with gr.Row(variant="panel"):
+                    clear_btn = Button(
+                        "Clear",
+                    )
+                    submit_btn = Button("Submit", variant="primary", interactive=False)
+                with gr.Row(variant="panel"):
+                    with gr.Accordion("Model parameters", open=False, visible=SHOW_MODEL_PARAMETERS_IN_UI):
+                        num_chunks = Slider(
+                            minimum=1,
+                            maximum=6,
+                            step=1,
+                            value=2,
+                            label="Number of chunks"
+                        )
+                        max_new_tokens = Slider(
+                            minimum=50,
+                            maximum=2000,
+                            step=1,
+                            value=MAX_NEW_TOKENS,
+                            label="Max tokens"
+                        )
+                        repetition_penalty = Slider(
+                            minimum=0.1,
+                            maximum=2.0,
+                            step=0.1,
+                            value=1.0,
+                            label="Repetition penalty"
+                        )
+                        top_k = Slider(
+                            minimum=1,
+                            maximum=100,
+                            step=1,
+                            value=50,
+                            label="Top k"
+                        )
+                        top_p = Slider(
+                            minimum=0.01,
+                            maximum=0.99,
+                            value=0.99,
+                            label="Top p"
+                        )
+                        do_sample = Checkbox(
+                            value=False,
+                            label="Do sample"
+                        )
+                        temperature = Slider(
+                            minimum=0.1,
+                            maximum=1,
+                            value=0.35,
+                            label="Temperature"
+                        )
+                        parameters_compontents = [num_chunks, max_new_tokens, repetition_penalty, top_k, top_p, do_sample, temperature]
+            with gr.Column(variant="panel"):
+                output = Textbox(
+                    lines=10,
+                    label="Output",
+                    interactive=False,
+                    show_copy_button=True
+                )
+                with gr.Accordion("Sources and context:", open=False):
+                    source_context = gr.Markdown(
+                        label="Sources",
+                        show_label=False,
+                    )
+                    with gr.Accordion("See full context evaluation:", open=False):
+                        context_evaluation = gr.Markdown(
+                            label="Full context",
+                            show_label=False,
+                            # interactive=False,
+                            # autoscroll=False,
+                            # show_copy_button=True
+                        )
+        input_.change(
+            fn=change_interactive,
+            inputs=[input_],
+            outputs=[clear_btn, submit_btn],
+            api_name=False,
+        )
+        input_.change(
+            fn=None,
+            inputs=[input_],
+            api_name=False,
+            js="""(i, m) => {
+            document.getElementById('inputlenght').textContent = i.length + '  '
+            document.getElementById('inputlenght').style.color =  (i.length > m) ? "#ef4444" : "";
+        }""",
+        )
+        clear_btn.click(
+            fn=clear,
+            inputs=[],
+            outputs=[input_, output, source_context, context_evaluation] + parameters_compontents,
+              queue=False,
+              api_name=False
+        )
+        submit_btn.click(
+            fn=submit_input,
+            inputs=[input_]+ parameters_compontents,
+            outputs=[output, source_context, context_evaluation],
+            api_name="get-results"
+        )
+        with gr.Row():
+            with gr.Column(scale=0.5):
+                gr.Examples(
+                    examples=[
+                        ["""Qui va crear la guerra de les Galaxies ?"""],
+                    ],
+                    inputs=input_,
+                    outputs=[output, source_context, context_evaluation],
+                    fn=submit_input,
+                )
+        demo.launch(show_api=True)
+if __name__ == "__main__":
+    gradio_app()

handler.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import json
+class ContentHandler():
+    content_type = "application/json"
+    accepts = "application/json"
+    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+        input_str = json.dumps({'inputs': prompt, 'parameters': model_kwargs})
+        return input_str.encode('utf-8')
+    def transform_output(self, output: bytes) -> str:
+        response_json = json.loads(output.read().decode("utf-8"))
+        return response_json[0]["generated_text"]

input_reader.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from typing import List
+from llama_index.core.constants import DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_SIZE
+from llama_index.core.readers import SimpleDirectoryReader
+from llama_index.core.schema import Document
+from llama_index.core import Settings
+class InputReader:
+    def __init__(self, input_dir: str) -> None:
+        self.reader = SimpleDirectoryReader(input_dir=input_dir)
+    def parse_documents(
+        self,
+        show_progress: bool = True,
+        chunk_size: int = DEFAULT_CHUNK_SIZE,
+        chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
+    ) -> List[Document]:
+        Settings.chunk_size = chunk_size
+        Settings.chunk_overlap = chunk_overlap
+        documents = self.reader.load_data(show_progress=show_progress)
+        return documents

rag.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import logging
+import os
+import requests
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+from openai import OpenAI
+from huggingface_hub import snapshot_download, InferenceClient
+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+class RAG:
+    NO_ANSWER_MESSAGE: str = "Ho sento, no he pogut respondre la teva pregunta."
+    #vectorstore = "index-intfloat_multilingual-e5-small-500-100-CA-ES" # mixed
+    #vectorstore = "vectorestore" # CA only
+    #vectorstore = "index-BAAI_bge-m3-1500-200-recursive_splitter-CA_ES_UE"
+    def __init__(self, vs_hf_repo_path, vectorstore_path, hf_token, embeddings_model, model_name, rerank_model, rerank_number_contexts):
+        self.vs_hf_repo_path = vs_hf_repo_path
+        self.vectorstore_path=vectorstore_path
+        self.model_name = model_name
+        self.hf_token = hf_token
+        self.rerank_model = rerank_model
+        self.rerank_number_contexts = rerank_number_contexts
+        # load vectore store
+        hf_vectorstore = snapshot_download(repo_id=vs_hf_repo_path)
+        embeddings = HuggingFaceEmbeddings(model_name=embeddings_model, model_kwargs={'device': 'cpu'})
+        self.vectore_store = FAISS.load_local(hf_vectorstore, embeddings, allow_dangerous_deserialization=True)
+        # self.vectore_store = FAISS.load_local(self.vectorstore_path, embeddings, allow_dangerous_deserialization=True)#, allow_dangerous_deserialization=True)
+        logging.info("RAG loaded!")
+        logging.info( self.vectore_store)
+    def rerank_contexts(self, instruction, contexts, number_of_contexts=1):
+        """
+        Rerank the contexts based on their relevance to the given instruction.
+        """
+        rerank_model = self.rerank_model
+        tokenizer = AutoTokenizer.from_pretrained(rerank_model)
+        model = AutoModelForSequenceClassification.from_pretrained(rerank_model)
+        def get_score(query, passage):
+            """Calculate the relevance score of a passage with respect to a query."""
+            inputs = tokenizer(query, passage, return_tensors='pt', truncation=True, padding=True, max_length=512)
+            with torch.no_grad():
+                outputs = model(**inputs)
+            logits = outputs.logits
+            score = logits.view(-1, ).float()
+            return score
+        scores = [get_score(instruction, c[0].page_content) for c in contexts]
+        combined = list(zip(contexts, scores))
+        sorted_combined = sorted(combined, key=lambda x: x[1], reverse=True)
+        sorted_texts, _ = zip(*sorted_combined)
+        return sorted_texts[:number_of_contexts]
+    def get_context(self, instruction, number_of_contexts=2):
+        """Retrieve the most relevant contexts for a given instruction."""
+        logging.info("RETRIEVE DOCUMENTS")
+        documentos = self.vectore_store.similarity_search_with_score(instruction, k=self.rerank_number_contexts)
+        # logging.info(documentos)
+        logging.info("RERANK DOCUMENTS")
+        documentos = self.rerank_contexts(instruction, documentos, number_of_contexts=number_of_contexts)
+        # logging.info(documentos)
+        print("Reranked documents")
+        return documentos
+    def predict_dolly(self, instruction, context, model_parameters):
+        api_key = os.getenv("HF_TOKEN")
+        headers = {
+        "Accept" : "application/json",
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+        }
+        query = f"### Instruction\n{instruction}\n\n### Context\n{context}\n\n### Answer\n "
+        #prompt = "You are a helpful assistant. Answer the question using only the context you are provided with. If it is not possible to do it with the context, just say 'I can't answer'. <|endoftext|>"
+        payload = {
+        "inputs": query,
+        "parameters": model_parameters
+        }
+        response = requests.post(self.model_name, headers=headers, json=payload)
+        return response.json()[0]["generated_text"].split("###")[-1][8:]
+    def predict_completion(self, instruction, context, model_parameters):
+        client = OpenAI(
+                base_url=os.getenv("MODEL"),
+                api_key=os.getenv("HF_TOKEN")
+            )
+        query = f"Context:\n{context}\n\nQuestion:\n{instruction}"
+        chat_completion = client.chat.completions.create(
+            model="tgi",
+            messages=[
+                {"role": "user", "content": query}
+            ],
+            temperature=model_parameters["temperature"],
+            max_tokens=model_parameters["max_new_tokens"],
+            stream=False,
+            stop=["<|im_end|>"],
+            extra_body = {
+                "presence_penalty": model_parameters["repetition_penalty"] - 2,
+                "do_sample": False
+            }
+        )
+        response = chat_completion.choices[0].message.content
+        return response
+    def beautiful_context(self, docs):
+        text_context = ""
+        full_context = ""
+        source_context = []
+        for doc in docs:
+            # print("="*100)
+            # logging.info(doc)
+            text_context += doc[0].page_content
+            full_context += doc[0].page_content + "\n"
+            full_context += doc[0].metadata["title"] + "\n\n"
+            full_context += doc[0].metadata["url"] + "\n\n"
+            source_context.append(doc[0].metadata["url"])
+        return text_context, full_context, source_context
+    def get_response(self, prompt: str, model_parameters: dict) -> str:
+        try:
+            docs = self.get_context(prompt, model_parameters["NUM_CHUNKS"])
+            text_context, full_context, source = self.beautiful_context(docs)
+            print("#"*100)
+            logging.info("text_context")
+            logging.info(text_context)
+            print("#"*100)
+            logging.info("full context")
+            logging.info(full_context)
+            print("#"*100)
+            logging.info("source")
+            logging.info(source)
+            del model_parameters["NUM_CHUNKS"]
+            response = self.predict_completion(prompt, text_context, model_parameters)
+            if not response:
+                return self.NO_ANSWER_MESSAGE
+            return response, full_context, source
+        except Exception as err:
+            print(err)

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+gradio
+huggingface-hub
+python-dotenv
+llama-index
+llama-index-embeddings-huggingface
+llama-index-llms-huggingface
+sentence-transformers
+langchain
+faiss-cpu
+aina-gradio-theme
+langchain-community
+langchain-core
+openai

utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+import warnings
+from dotenv import load_dotenv
+from rag import RAG
+USER_INPUT = 100
+def setup():
+    load_dotenv()
+    warnings.filterwarnings("ignore")
+    logging.addLevelName(USER_INPUT, "USER_INPUT")
+    logging.basicConfig(format="[%(levelname)s]: %(message)s", level=logging.INFO)
+def interactive(model: RAG):
+    logging.info("Write `exit` when you want to stop the model.")
+    print()
+    query = ""
+    while query.lower() != "exit":
+        logging.log(USER_INPUT, "Write the query or `exit`:")
+        query = input()
+        if query.lower() == "exit":
+            break
+        response = model.get_response(query)
+        print(response, end="\n\n")