Spaces:

genaibeauty
/

RAG_Chatbot

Sleeping

App Files Files Community

RAG_Chatbot / app.py

genaibeauty

Update app.py

1f30b9a verified 23 days ago

raw

history blame contribute delete

2.33 kB

	import gradio as gr
	import tempfile
	from pdfminer.high_level import extract_text
	from sentence_transformers import SentenceTransformer
	import faiss
	from ctransformers import AutoModelForCausalLM

	# Embeddings Model
	embedder = SentenceTransformer("all-MiniLM-L6-v2")

	# FREE, non-gated GGUF model
	llm = AutoModelForCausalLM.from_pretrained(
	"TheBloke/OpenHermes-2.5-Mistral-7B-GGUF",
	model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf",
	model_type="mistral",
	gpu_layers=0
	)

	doc_chunks, index = [], None

	def extract_text_from_pdf(pdf_path):
	return extract_text(pdf_path)

	def chunk_text(text, size=500, overlap=50):
	words = text.split()
	return [" ".join(words[i:i+size]) for i in range(0, len(words), size - overlap)]

	def create_faiss_index(chunks):
	vectors = embedder.encode(chunks)
	idx = faiss.IndexFlatL2(vectors.shape[1])
	idx.add(vectors)
	return idx

	def retrieve_chunks(query, chunks, idx, k=3):
	q_vec = embedder.encode([query])
	_, indices = idx.search(q_vec, k)
	return [chunks[i] for i in indices[0]]

	def build_prompt(query, context_chunks):
	context = "\n\n".join(context_chunks)
	return f"""You are a helpful assistant. Use the context below to answer the user's question.

	Context:
	{context}

	Question:
	{query}

	Answer:"""

	def llm_answer(prompt):
	return llm(prompt, max_new_tokens=256)

	def process_pdf(file):
	global doc_chunks, index
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
	tmp.write(file.read())
	text = extract_text_from_pdf(tmp.name)
	doc_chunks = chunk_text(text)
	index = create_faiss_index(doc_chunks)
	return "✅ PDF processed. Ask me anything!"

	def chat_with_pdf(message):
	if not doc_chunks:
	return "❌ Upload a PDF first."
	chunks = retrieve_chunks(message, doc_chunks, index)
	prompt = build_prompt(message, chunks)
	return llm_answer(prompt)

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 Chat with your PDF (Open Source, No Login!)")
	file_input = gr.File(label="Upload PDF")
	upload_btn = gr.Button("Process PDF")
	chatbot = gr.ChatInterface(fn=chat_with_pdf, textbox=gr.Textbox(placeholder="Ask something from your PDF..."))

	upload_btn.click(fn=process_pdf, inputs=[file_input], outputs=[chatbot.textbox])

	demo.launch()