Spaces:

Rohit1412
/

gemma3-27b-RAG

Sleeping

App Files Files Community

Rohit1412 commited on Mar 16

Commit

1c5c7d4

verified ·

1 Parent(s): 16025b7

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -72

app.py CHANGED Viewed

@@ -1,92 +1,133 @@
 import gradio as gr
-import requests
-import os
 import PyPDF2
-# Set your Hugging Face API token.
-# Option 1: Set it as an environment variable named "HF_API_TOKEN".
-# Option 2: Replace "YOUR_HUGGINGFACE_API_TOKEN" with your token directly.
-API_TOKEN = os.environ.get("HF_TOKEN", "YOUR_HUGGINGFACE_API_TOKEN")
-headers = {"Authorization": f"Bearer {API_TOKEN}"}
-def extract_pdf_text(pdf_file):
-    """
-    Extracts text from a PDF file using PyPDF2.
-    """
-    pdf_text = ""
     try:
-        with open(pdf_file, "rb") as f:
             reader = PyPDF2.PdfReader(f)
             for page in reader.pages:
                 text = page.extract_text()
                 if text:
-                    pdf_text += text + "\n"
     except Exception as e:
-        print("Error reading PDF:", e)
-    return pdf_text
-def generate_response(query, pdf_file=None):
-    """
-    If a PDF file is uploaded, extract its text and combine a limited part of it
-    with the user query to form a prompt. Then send the prompt to the Hugging Face
-    Inference API using the RAG model.
-    """
-    pdf_text = ""
-    if pdf_file is not None:
-         pdf_text = extract_pdf_text(pdf_file)
-    # If PDF text is available, append its (truncated) content as context
-    if pdf_text:
-         # Limit the context to avoid token overflow; adjust as needed.
-         context = pdf_text[:2000]
-         full_input = "Context: " + context + "\n\nQuestion: " + query
-    else:
-         full_input = query
-    # Define the model and endpoint for the RAG model.
-    model_id = "google/flan-t5-large"
-    api_url = f"https://api-inference.huggingface.co/models/{model_id}"
-    payload = {"inputs": full_input}
-    response = requests.post(api_url, headers=headers, json=payload)
-    if response.status_code != 200:
-        return "Error: " + response.text
-    result = response.json()
-    # Extract the generated text if available.
-    if isinstance(result, list) and result and "generated_text" in result[0]:
-        return result[0]["generated_text"]
     else:
-        return str(result)
 with gr.Blocks() as demo:
-    gr.Markdown("# Retrieval Augmented Generation (RAG) Chatbot with PDF Input")
     gr.Markdown(
-        "Powered by the Hugging Face Inference API. "
-        "Optionally upload a PDF file and ask a question related to its content. "
-        "If no PDF is uploaded, the model will answer based solely on the query."
     )
     with gr.Row():
         with gr.Column():
-            pdf_input = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
-            query_input = gr.Textbox(label="Your Question", placeholder="Type your question here...", lines=3)
             submit_button = gr.Button("Submit")
-            gr.Examples(
-                examples=[
-                    ["What is the main argument in the document?"],
-                    ["Summarize the content of the PDF."],
-                    ["What conclusions can be drawn from the report?"],
-                ],
-                inputs=query_input,
-                label="Try one of these examples:"
-            )
         with gr.Column():
             response_output = gr.Textbox(label="Response", placeholder="The answer will appear here...", lines=10)
-    # Link the button click to the generate_response function.
-    submit_button.click(fn=generate_response, inputs=[query_input, pdf_input], outputs=response_output)
-# Launch the app locally
-demo.launch()

 import gradio as gr
+import torch
+from sentence_transformers import SentenceTransformer, util
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import PyPDF2
+import os
+import time
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load models
+retriever_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+gen_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
+gen_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
+# Cache for document embeddings
+embedding_cache = {}
+def extract_text_from_pdf(pdf_file):
+    """Extract text from a PDF file, returning a list of page texts."""
+    pages = []
     try:
+        with open(pdf_file.name, "rb") as f:
             reader = PyPDF2.PdfReader(f)
             for page in reader.pages:
                 text = page.extract_text()
                 if text:
+                    pages.append(text.strip())
     except Exception as e:
+        logger.error(f"Error reading PDF {pdf_file.name}: {str(e)}")
+        pages.append(f"Error reading PDF: {str(e)}")
+    return pages
+def chunk_text(text, chunk_size=500):
+    """Split text into chunks of approximately chunk_size characters."""
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    current_length = 0
+    for word in words:
+        if current_length + len(word) > chunk_size and current_chunk:
+            chunks.append(" ".join(current_chunk))
+            current_chunk = []
+            current_length = 0
+        current_chunk.append(word)
+        current_length += len(word) + 1  # +1 for space
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def get_document_embeddings(documents):
+    """Compute embeddings for documents, using cache if available."""
+    embeddings = []
+    for doc in documents:
+        if doc in embedding_cache:
+            embeddings.append(embedding_cache[doc])
+        else:
+            emb = retriever_model.encode(doc, convert_to_tensor=True)
+            embedding_cache[doc] = emb
+            embeddings.append(emb)
+    return embeddings
+def rag_pipeline(question, pdf_files):
+    """Optimized RAG pipeline with caching, chunking, and improved retrieval."""
+    start_time = time.time()
+    documents = []
+    # Process PDFs if provided
+    if pdf_files:
+        for pdf in pdf_files:
+            pages = extract_text_from_pdf(pdf)
+            for page in pages:
+                chunks = chunk_text(page)
+                documents.extend(chunks)
     else:
+        # Default documents if no PDFs
+        documents = [
+            "Paris is the capital of France and is known for its art, gastronomy, and culture.",
+            "France is a country in Western Europe with diverse landscapes and a rich history.",
+            "The Eiffel Tower is one of the most famous landmarks in Paris, France.",
+            "Paris has a population of over 2 million people and is a major global city.",
+        ]
+    if not documents:
+        return "No valid text could be extracted from the PDFs."
+    # Compute embeddings with caching
+    doc_embeddings = get_document_embeddings(documents)
+    # Embed the query
+    query_embedding = retriever_model.encode(question, convert_to_tensor=True)
+    # Retrieve top 3 chunks using cosine similarity
+    cos_scores = util.pytorch_cos_sim(query_embedding, doc_embeddings)[0]
+    top_results = torch.topk(cos_scores, k=min(3, len(documents)))
+    retrieved_context = ""
+    for score, idx in zip(top_results.values, top_results.indices):
+        retrieved_context += f"Context: {documents[idx]}\n"
+    # Optimized prompt for the generator
+    prompt = f"Using the provided context, answer the following question:\n\nContext:\n{retrieved_context}\n\nQuestion: {question}\n\nAnswer:"
+    # Generate answer
+    inputs = gen_tokenizer(prompt, return_tensors="pt")
+    outputs = gen_model.generate(**inputs, max_new_tokens=100)
+    answer = gen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Log processing time
+    logger.info(f"Processing time: {time.time() - start_time:.2f} seconds")
+    return answer
+# Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# Improved Lightweight Local RAG Pipeline with PDF Input")
     gr.Markdown(
+        "Upload one or more PDF files (or leave blank for default documents), enter your question, "
+        "and get an answer generated using an optimized retrieval step (all-MiniLM-L6-v2) and a small "
+        "generator model (flan-t5-small). Designed for 2 vCPUs and 16GB RAM."
     )
     with gr.Row():
         with gr.Column():
+            question_input = gr.Textbox(label="Your Question", placeholder="Type your question here...", lines=3)
+            pdf_input = gr.File(label="Upload PDF(s) (optional)", file_types=[".pdf"], file_count="multiple")
             submit_button = gr.Button("Submit")
         with gr.Column():
             response_output = gr.Textbox(label="Response", placeholder="The answer will appear here...", lines=10)
+    submit_button.click(fn=rag_pipeline, inputs=[question_input, pdf_input], outputs=response_output)
+demo.launch()