Spaces:
Sleeping
Sleeping
import gradio as gr | |
import tempfile | |
from pdfminer.high_level import extract_text | |
from sentence_transformers import SentenceTransformer | |
import faiss | |
from ctransformers import AutoModelForCausalLM | |
# Embeddings Model | |
embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
# FREE, non-gated GGUF model | |
llm = AutoModelForCausalLM.from_pretrained( | |
"TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", | |
model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf", | |
model_type="mistral", | |
gpu_layers=0 | |
) | |
doc_chunks, index = [], None | |
def extract_text_from_pdf(pdf_path): | |
return extract_text(pdf_path) | |
def chunk_text(text, size=500, overlap=50): | |
words = text.split() | |
return [" ".join(words[i:i+size]) for i in range(0, len(words), size - overlap)] | |
def create_faiss_index(chunks): | |
vectors = embedder.encode(chunks) | |
idx = faiss.IndexFlatL2(vectors.shape[1]) | |
idx.add(vectors) | |
return idx | |
def retrieve_chunks(query, chunks, idx, k=3): | |
q_vec = embedder.encode([query]) | |
_, indices = idx.search(q_vec, k) | |
return [chunks[i] for i in indices[0]] | |
def build_prompt(query, context_chunks): | |
context = "\n\n".join(context_chunks) | |
return f"""You are a helpful assistant. Use the context below to answer the user's question. | |
Context: | |
{context} | |
Question: | |
{query} | |
Answer:""" | |
def llm_answer(prompt): | |
return llm(prompt, max_new_tokens=256) | |
def process_pdf(file): | |
global doc_chunks, index | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
tmp.write(file.read()) | |
text = extract_text_from_pdf(tmp.name) | |
doc_chunks = chunk_text(text) | |
index = create_faiss_index(doc_chunks) | |
return "β PDF processed. Ask me anything!" | |
def chat_with_pdf(message): | |
if not doc_chunks: | |
return "β Upload a PDF first." | |
chunks = retrieve_chunks(message, doc_chunks, index) | |
prompt = build_prompt(message, chunks) | |
return llm_answer(prompt) | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("## π§ Chat with your PDF (Open Source, No Login!)") | |
file_input = gr.File(label="Upload PDF") | |
upload_btn = gr.Button("Process PDF") | |
chatbot = gr.ChatInterface(fn=chat_with_pdf, textbox=gr.Textbox(placeholder="Ask something from your PDF...")) | |
upload_btn.click(fn=process_pdf, inputs=[file_input], outputs=[chatbot.textbox]) | |
demo.launch() | |