RAG_Chatbot / app.py
genaibeauty's picture
Update app.py
1f30b9a verified
raw
history blame contribute delete
2.33 kB
import gradio as gr
import tempfile
from pdfminer.high_level import extract_text
from sentence_transformers import SentenceTransformer
import faiss
from ctransformers import AutoModelForCausalLM
# Embeddings Model
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# FREE, non-gated GGUF model
llm = AutoModelForCausalLM.from_pretrained(
"TheBloke/OpenHermes-2.5-Mistral-7B-GGUF",
model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf",
model_type="mistral",
gpu_layers=0
)
doc_chunks, index = [], None
def extract_text_from_pdf(pdf_path):
return extract_text(pdf_path)
def chunk_text(text, size=500, overlap=50):
words = text.split()
return [" ".join(words[i:i+size]) for i in range(0, len(words), size - overlap)]
def create_faiss_index(chunks):
vectors = embedder.encode(chunks)
idx = faiss.IndexFlatL2(vectors.shape[1])
idx.add(vectors)
return idx
def retrieve_chunks(query, chunks, idx, k=3):
q_vec = embedder.encode([query])
_, indices = idx.search(q_vec, k)
return [chunks[i] for i in indices[0]]
def build_prompt(query, context_chunks):
context = "\n\n".join(context_chunks)
return f"""You are a helpful assistant. Use the context below to answer the user's question.
Context:
{context}
Question:
{query}
Answer:"""
def llm_answer(prompt):
return llm(prompt, max_new_tokens=256)
def process_pdf(file):
global doc_chunks, index
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file.read())
text = extract_text_from_pdf(tmp.name)
doc_chunks = chunk_text(text)
index = create_faiss_index(doc_chunks)
return "βœ… PDF processed. Ask me anything!"
def chat_with_pdf(message):
if not doc_chunks:
return "❌ Upload a PDF first."
chunks = retrieve_chunks(message, doc_chunks, index)
prompt = build_prompt(message, chunks)
return llm_answer(prompt)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Chat with your PDF (Open Source, No Login!)")
file_input = gr.File(label="Upload PDF")
upload_btn = gr.Button("Process PDF")
chatbot = gr.ChatInterface(fn=chat_with_pdf, textbox=gr.Textbox(placeholder="Ask something from your PDF..."))
upload_btn.click(fn=process_pdf, inputs=[file_input], outputs=[chatbot.textbox])
demo.launch()