Spaces:

Rohit1412
/

gemma3-27b-RAG

Sleeping

App Files Files Community

Rohit1412 commited on Mar 16

Commit

3a1d8c8

verified ·

1 Parent(s): 12e6360

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -144

app.py CHANGED Viewed

@@ -1,161 +1,92 @@
-import os
 import gradio as gr
-import faiss
-import numpy as np
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from sentence_transformers import SentenceTransformer
-# ---------------------------
-# Load Models (cached on first run)
-# ---------------------------
-def load_models():
-    hf_token = os.getenv("HF_TOKEN")  # Set this secret in your HF Space settings
-    embed_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  # For embeddings
-    tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-4b-it", use_auth_token=hf_token)
-    model = AutoModelForCausalLM.from_pretrained(
-        "google/gemma-3-4b-it",
-        device_map="auto",
-        low_cpu_mem_usage=True,
-        use_auth_token=hf_token
-    )
-    return embed_model, tokenizer, model
-embed_model, tokenizer, model = load_models()
-# ---------------------------
-# Global state for FAISS index and document chunks.
-# Using a dictionary to hold state.
-state = {
-    "faiss_index": None,
-    "doc_chunks": []
-}
-# ---------------------------
-# Document Processing Function
-# ---------------------------
-def process_document(file, chunk_size, chunk_overlap):
     """
-    Reads the uploaded file (PDF or text), extracts text, splits into chunks,
-    computes embeddings, and builds a FAISS index.
     """
-    if file is None:
-        return "No file uploaded."
-    file_bytes = file.read()
-    file_name = file.name
-    text = ""
-    if file_name.lower().endswith(".pdf"):
-        try:
-            from PyPDF2 import PdfReader
-        except ImportError:
-            return "Error: PyPDF2 is required for PDF extraction."
-        # Save file to temporary path
-        temp_path = os.path.join("temp", file_name)
-        os.makedirs("temp", exist_ok=True)
-        with open(temp_path, "wb") as f:
-            f.write(file_bytes)
-        reader = PdfReader(temp_path)
-        for page in reader.pages:
-            text += page.extract_text() or ""
-    else:
-        # Assume it's a text file
-        text = file_bytes.decode("utf-8", errors="ignore")
-    if text.strip() == "":
-        return "No text found in the document."
-    # Split text into overlapping chunks
-    chunks = []
-    for start in range(0, len(text), chunk_size - chunk_overlap):
-        chunk_text = text[start: start + chunk_size]
-        chunks.append(chunk_text)
-    # Compute embeddings for each chunk using the embedding model.
-    embeddings = embed_model.encode(chunks, normalize_embeddings=True).astype('float32')
-    dim = embeddings.shape[1]
-    # Build FAISS index using cosine similarity (normalized vectors -> inner product)
-    index = faiss.IndexFlatIP(dim)
-    index.add(embeddings)
-    # Update global state
-    state["faiss_index"] = index
-    state["doc_chunks"] = chunks
-    # Return a preview (first 500 characters of the first chunk) and status.
-    preview = chunks[0][:500] if chunks else "No content"
-    return f"Indexed {len(chunks)} chunks.\n\n**Document Preview:**\n{preview}"
-# ---------------------------
-# Question Answering Function
-# ---------------------------
-def answer_question(query, top_k):
     """
-    Retrieves the top_k chunks most relevant to the query using the FAISS index,
-    builds a prompt with the retrieved context, and generates an answer using the Gemma model.
     """
-    index = state.get("faiss_index")
-    chunks = state.get("doc_chunks")
-    if index is None or len(chunks) == 0:
-        return "No document processed. Please upload a document first."
-    # Encode query using the same embedding model
-    query_vec = embed_model.encode([query], normalize_embeddings=True).astype('float32')
-    D, I = index.search(query_vec, top_k)
-    # Concatenate retrieved chunks as context
-    retrieved_text = ""
-    for idx in I[0]:
-        retrieved_text += chunks[idx] + "\n"
-    # Formulate the prompt for the generative model
-    prompt = f"Context:\n{retrieved_text}\nQuestion: {query}\nAnswer:"
-    # Tokenize and generate answer
-    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
-    output_ids = model.generate(input_ids, max_new_tokens=200, temperature=0.2)
-    answer = tokenizer.decode(output_ids[0][input_ids.size(1):], skip_special_tokens=True)
-    return answer.strip()
-# ---------------------------
-# Gradio Interface
-# ---------------------------
-with gr.Blocks(title="RAG System with Gemma‑3‑4B‑it") as demo:
     gr.Markdown(
-        """
-        # RAG System with Gemma‑3‑4B‑it
-        Upload a document (PDF or TXT) below. The system will extract text, split it into chunks,
-        build a vector index using FAISS, and then allow you to ask questions based on the document.
-        """
     )
-    with gr.Tab("Document Upload & Processing"):
-        with gr.Row():
-            file_input = gr.File(label="Upload Document (PDF or TXT)", file_count="single")
-        with gr.Row():
-            chunk_size_input = gr.Number(label="Chunk Size (characters)", value=1000, precision=0)
-            chunk_overlap_input = gr.Number(label="Chunk Overlap (characters)", value=100, precision=0)
-        process_btn = gr.Button("Process Document")
-        process_output = gr.Markdown()
-    with gr.Tab("Ask a Question"):
-        query_input = gr.Textbox(label="Enter your question", placeholder="Type your question here...")
-        top_k_input = gr.Number(label="Number of Chunks to Retrieve", value=3, precision=0)
-        answer_btn = gr.Button("Get Answer")
-        answer_output = gr.Markdown(label="Answer")
-    # Set up actions
-    process_btn.click(
-        fn=process_document,
-        inputs=[file_input, chunk_size_input, chunk_overlap_input],
-        outputs=process_output
-    )
-    answer_btn.click(
-        fn=answer_question,
-        inputs=[query_input, top_k_input],
-        outputs=answer_output
-    )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import requests
+import os
+import PyPDF2
+# Set your Hugging Face API token.
+# Option 1: Set it as an environment variable named "HF_API_TOKEN".
+# Option 2: Replace "YOUR_HUGGINGFACE_API_TOKEN" with your token directly.
+API_TOKEN = os.environ.get("HF_TOKEN", "YOUR_HUGGINGFACE_API_TOKEN")
+headers = {"Authorization": f"Bearer {API_TOKEN}"}
+def extract_pdf_text(pdf_file):
     """
+    Extracts text from a PDF file using PyPDF2.
     """
+    pdf_text = ""
+    try:
+        with open(pdf_file, "rb") as f:
+            reader = PyPDF2.PdfReader(f)
+            for page in reader.pages:
+                text = page.extract_text()
+                if text:
+                    pdf_text += text + "\n"
+    except Exception as e:
+        print("Error reading PDF:", e)
+    return pdf_text
+def generate_response(query, pdf_file=None):
     """
+    If a PDF file is uploaded, extract its text and combine a limited part of it
+    with the user query to form a prompt. Then send the prompt to the Hugging Face
+    Inference API using the RAG model.
     """
+    pdf_text = ""
+    if pdf_file is not None:
+         pdf_text = extract_pdf_text(pdf_file)
+    # If PDF text is available, append its (truncated) content as context
+    if pdf_text:
+         # Limit the context to avoid token overflow; adjust as needed.
+         context = pdf_text[:2000]
+         full_input = "Context: " + context + "\n\nQuestion: " + query
+    else:
+         full_input = query
+    # Define the model and endpoint for the RAG model.
+    model_id = "facebook/rag-token-nq"
+    api_url = f"https://api-inference.huggingface.co/models/{model_id}"
+    payload = {"inputs": full_input}
+    response = requests.post(api_url, headers=headers, json=payload)
+    if response.status_code != 200:
+        return "Error: " + response.text
+    result = response.json()
+    # Extract the generated text if available.
+    if isinstance(result, list) and result and "generated_text" in result[0]:
+        return result[0]["generated_text"]
+    else:
+        return str(result)
+with gr.Blocks() as demo:
+    gr.Markdown("# Retrieval Augmented Generation (RAG) Chatbot with PDF Input")
     gr.Markdown(
+        "Powered by the Hugging Face Inference API. "
+        "Optionally upload a PDF file and ask a question related to its content. "
+        "If no PDF is uploaded, the model will answer based solely on the query."
     )
+    with gr.Row():
+        with gr.Column():
+            pdf_input = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
+            query_input = gr.Textbox(label="Your Question", placeholder="Type your question here...", lines=3)
+            submit_button = gr.Button("Submit")
+            gr.Examples(
+                examples=[
+                    ["What is the main argument in the document?"],
+                    ["Summarize the content of the PDF."],
+                    ["What conclusions can be drawn from the report?"],
+                ],
+                inputs=query_input,
+                label="Try one of these examples:"
+            )
+        with gr.Column():
+            response_output = gr.Textbox(label="Response", placeholder="The answer will appear here...", lines=10)
+    # Link the button click to the generate_response function.
+    submit_button.click(fn=generate_response, inputs=[query_input, pdf_input], outputs=response_output)
+# Launch the app locally
+demo.launch()