|
import os |
|
import gradio as gr |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.vectorstores import Chroma |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_community.document_loaders import PyPDFLoader |
|
import requests |
|
from rerankers import Reranker |
|
from langchain_huggingface import HuggingFacePipeline |
|
from langchain import hub |
|
from langchain_core.output_parsers import StrOutputParser |
|
|
|
|
|
os.environ["USER_AGENT"] = "MyHFSpace/1.0 (HuggingFace Space for Nutrition Q&A)" |
|
|
|
|
|
URL = "https://gruposdetrabajo.sefh.es/gefp/images/stories/documentos/4-ATENCION-FARMACEUTICA/Nutricion/Manual_basico_N_clinica_y_Dietetica_Valencia_2012.pdf" |
|
response = requests.get(URL) |
|
with open("Manual_de_nutrici贸n_clinica.pdf", "wb") as f: |
|
f.write(response.content) |
|
|
|
|
|
|
|
llm = HuggingFacePipeline.from_model_id( |
|
model_id="gpt2", |
|
task="text-generation", |
|
pipeline_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95, "max_length": 512} |
|
) |
|
chain = llm | StrOutputParser() |
|
|
|
|
|
loader = PyPDFLoader("Manual_de_nutrici贸n_clinica.pdf") |
|
documents = loader.load() |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
|
all_splits = text_splitter.split_documents(documents) |
|
|
|
|
|
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" |
|
embeddings = HuggingFaceEmbeddings(model_name=model_name) |
|
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db") |
|
|
|
|
|
ranker = Reranker("answerdotai/answerai-colbert-small-v1", model_type='colbert') |
|
|
|
|
|
def format_docs(docs): |
|
return "\n\n".join(doc[0].page_content for doc in docs) |
|
|
|
def test_rag_reranking(query, ranker): |
|
docs = vectordb.similarity_search_with_score(query) |
|
prompt = hub.pull("rlm/rag-prompt") |
|
rag_chain = prompt | llm | StrOutputParser() |
|
context = [] |
|
for doc, score in docs: |
|
if score < 7: |
|
doc_details = doc.to_json()['kwargs'] |
|
context.append(doc_details['page_content']) |
|
if len(context) > 0: |
|
ranking = ranker.rank(query=query, docs=context) |
|
useful_context = ranking[0].text |
|
generation = rag_chain.invoke({"context": useful_context, "question": query}) |
|
return generation |
|
else: |
|
return "No tengo informaci贸n para responder a esta pregunta" |
|
|
|
|
|
def answer_query(query): |
|
return test_rag_reranking(query, ranker) |
|
|
|
interface = gr.Interface( |
|
fn=answer_query, |
|
inputs=gr.Textbox(label="Ingresa tu pregunta sobre nutrici贸n:"), |
|
outputs=gr.Textbox(label="Respuesta:"), |
|
title="Respuesta a Preguntas sobre Nutrici贸n", |
|
description="Haz preguntas sobre nutrici贸n basadas en el Manual B谩sico de Nutrici贸n Cl铆nica y Diet茅tica." |
|
) |
|
|
|
|
|
interface.launch() |