Spaces:
Sleeping
Sleeping
File size: 2,330 Bytes
53226ae 11ffe11 f71666e 53226ae a6fd4b6 1f30b9a 53226ae 11ffe11 1f30b9a be51307 f71666e a6fd4b6 53226ae a6fd4b6 f71666e 53226ae f71666e 1c191c0 53226ae f71666e 53226ae a6fd4b6 f71666e a6fd4b6 53226ae 11ffe11 53226ae a6fd4b6 53226ae 11ffe11 53226ae f71666e 11ffe11 53226ae f71666e 53226ae 1f30b9a 53226ae 1f30b9a f71666e 1f30b9a 53226ae f71666e 53226ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
import tempfile
from pdfminer.high_level import extract_text
from sentence_transformers import SentenceTransformer
import faiss
from ctransformers import AutoModelForCausalLM
# Embeddings Model
embedder = SentenceTransformer("all-MiniLM-L6-v2")
# FREE, non-gated GGUF model
llm = AutoModelForCausalLM.from_pretrained(
"TheBloke/OpenHermes-2.5-Mistral-7B-GGUF",
model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf",
model_type="mistral",
gpu_layers=0
)
doc_chunks, index = [], None
def extract_text_from_pdf(pdf_path):
return extract_text(pdf_path)
def chunk_text(text, size=500, overlap=50):
words = text.split()
return [" ".join(words[i:i+size]) for i in range(0, len(words), size - overlap)]
def create_faiss_index(chunks):
vectors = embedder.encode(chunks)
idx = faiss.IndexFlatL2(vectors.shape[1])
idx.add(vectors)
return idx
def retrieve_chunks(query, chunks, idx, k=3):
q_vec = embedder.encode([query])
_, indices = idx.search(q_vec, k)
return [chunks[i] for i in indices[0]]
def build_prompt(query, context_chunks):
context = "\n\n".join(context_chunks)
return f"""You are a helpful assistant. Use the context below to answer the user's question.
Context:
{context}
Question:
{query}
Answer:"""
def llm_answer(prompt):
return llm(prompt, max_new_tokens=256)
def process_pdf(file):
global doc_chunks, index
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(file.read())
text = extract_text_from_pdf(tmp.name)
doc_chunks = chunk_text(text)
index = create_faiss_index(doc_chunks)
return "β
PDF processed. Ask me anything!"
def chat_with_pdf(message):
if not doc_chunks:
return "β Upload a PDF first."
chunks = retrieve_chunks(message, doc_chunks, index)
prompt = build_prompt(message, chunks)
return llm_answer(prompt)
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## π§ Chat with your PDF (Open Source, No Login!)")
file_input = gr.File(label="Upload PDF")
upload_btn = gr.Button("Process PDF")
chatbot = gr.ChatInterface(fn=chat_with_pdf, textbox=gr.Textbox(placeholder="Ask something from your PDF..."))
upload_btn.click(fn=process_pdf, inputs=[file_input], outputs=[chatbot.textbox])
demo.launch()
|