File size: 2,867 Bytes
584e63b
a0219fe
 
 
 
 
584e63b
 
 
 
 
 
a0219fe
 
584e63b
 
 
 
 
 
 
 
 
 
 
 
a0219fe
 
584e63b
 
 
 
 
 
 
a0219fe
 
 
 
584e63b
 
 
a0219fe
584e63b
a0219fe
 
 
 
 
 
 
584e63b
 
 
 
 
 
a0219fe
 
 
584e63b
a0219fe
584e63b
a0219fe
 
 
 
 
 
 
 
 
 
 
584e63b
a0219fe
 
 
584e63b
a0219fe
 
 
 
 
 
 
584e63b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import Chroma
from langchain_community.llms import LlamaCpp
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
import gradio as gr

# Environment variable for Hugging Face API token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

# Paths for PDFs and model (upload these to the Hugging Face Space)
PDF_DIR = "./Data"  # Replace with the path where you upload your PDFs
MODEL_PATH = "./BioMistral-7B.Q4_K_M.gguf"  # Replace with the model's path in the Space

# Load and process PDF documents
loader = PyPDFDirectoryLoader(PDF_DIR)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(docs)

# Create embeddings and vector store
embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")
vectorstore = Chroma.from_documents(chunks, embeddings)

# Retriever for querying
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

# Initialize the LLM
llm = LlamaCpp(
    model_path=MODEL_PATH,
    temperature=0.2,
    max_tokens=2048,
    top_p=1
)

# Define the prompt template
template = """
<|context|>
You are a Medical Assistant that follows instructions and generates accurate responses based on the query and the context provided.
Please be truthful and give direct answers.
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

prompt = ChatPromptTemplate.from_template(template)

# Define the RAG chain
rag_chain = (
    {"context": retriever, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Define a function for the Gradio UI
def chatbot_ui(user_query):
    if not user_query.strip():
        return "Please enter a valid query."
    try:
        result = rag_chain.invoke(user_query)
        return result
    except Exception as e:
        return f"Error: {str(e)}"

# Create the Gradio interface
interface = gr.Interface(
    fn=chatbot_ui,
    inputs=gr.Textbox(label="Enter your medical query:", placeholder="Ask a medical question here..."),
    outputs=gr.Textbox(label="Chatbot Response"),
    title="Medical Assistant Chatbot",
    description="A chatbot designed for heart patients, providing accurate and reliable medical information.",
    examples=[
        ["What are the symptoms of diabetes?"],
        ["Explain the risk factors of heart disease."],
        ["How can I reduce cholesterol levels naturally?"],
    ]
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch()