Spaces:

abdull4h
/

vision-2030-virtual-assistant

Running on Zero

App Files Files Community

abdull4h commited on Mar 21

Commit

a8285e4

verified ·

1 Parent(s): 1f683db

Update app.py

Browse files

Files changed (1) hide show

app.py +341 -643

app.py CHANGED Viewed

@@ -1,42 +1,15 @@
 import os
 import re
 import json
-import torch
-import numpy as np
-import pandas as pd
 from tqdm import tqdm
 from pathlib import Path
 import spaces
-# PDF processing
-import PyPDF2
-# LLM and embeddings
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from sentence_transformers import SentenceTransformer
-# RAG components
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import FAISS
-from langchain.schema import Document
-from langchain.embeddings import HuggingFaceEmbeddings
-# Arabic text processing
-import arabic_reshaper
-from bidi.algorithm import get_display
-# Evaluation
-from rouge_score import rouge_scorer
-import sacrebleu
-from sklearn.metrics import accuracy_score, precision_recall_fscore_support
-import matplotlib.pyplot as plt
-import seaborn as sns
-from collections import defaultdict
-# Gradio for the interface
 import gradio as gr
-# Helper functions
 def safe_tokenize(text):
     """Pure regex tokenizer with no NLTK dependency"""
     if not text:
@@ -53,315 +26,6 @@ def detect_language(text):
     is_arabic = len(arabic_chars) > len(text) * 0.5
     return "arabic" if is_arabic else "english"
-# Evaluation metrics
-def calculate_bleu(prediction, reference):
-    """Calculate BLEU score without any NLTK dependency"""
-    # Tokenize texts using our own tokenizer
-    pred_tokens = safe_tokenize(prediction.lower())
-    ref_tokens = [safe_tokenize(reference.lower())]
-    # If either is empty, return 0
-    if not pred_tokens or not ref_tokens[0]:
-        return {"bleu_1": 0, "bleu_2": 0, "bleu_4": 0}
-    # Get n-grams function
-    def get_ngrams(tokens, n):
-        return [tuple(tokens[i:i+n]) for i in range(len(tokens) - n + 1)]
-    # Calculate precision for each n-gram level
-    precisions = []
-    for n in range(1, 5):  # 1-gram to 4-gram
-        if len(pred_tokens) < n:
-            precisions.append(0)
-            continue
-        pred_ngrams = get_ngrams(pred_tokens, n)
-        ref_ngrams = get_ngrams(ref_tokens[0], n)
-        # Count matches
-        matches = sum(1 for ng in pred_ngrams if ng in ref_ngrams)
-        # Calculate precision
-        if pred_ngrams:
-            precisions.append(matches / len(pred_ngrams))
-        else:
-            precisions.append(0)
-    # Return BLEU scores
-    return {
-        "bleu_1": precisions[0],
-        "bleu_2": (precisions[0] * precisions[1]) ** 0.5 if len(precisions) > 1 else 0,
-        "bleu_4": (precisions[0] * precisions[1] * precisions[2] * precisions[3]) ** 0.25 if len(precisions) > 3 else 0
-    }
-def calculate_meteor(prediction, reference):
-    """Simple word overlap metric as METEOR alternative"""
-    # Tokenize with our custom tokenizer
-    pred_tokens = set(safe_tokenize(prediction.lower()))
-    ref_tokens = set(safe_tokenize(reference.lower()))
-    # Calculate Jaccard similarity as METEOR alternative
-    if not pred_tokens or not ref_tokens:
-        return 0
-    intersection = len(pred_tokens.intersection(ref_tokens))
-    union = len(pred_tokens.union(ref_tokens))
-    return intersection / union if union > 0 else 0
-def calculate_f1_precision_recall(prediction, reference):
-    """Calculate word-level F1, precision, and recall with custom tokenizer"""
-    # Tokenize with our custom tokenizer
-    pred_tokens = set(safe_tokenize(prediction.lower()))
-    ref_tokens = set(safe_tokenize(reference.lower()))
-    # Calculate overlap
-    common = pred_tokens.intersection(ref_tokens)
-    # Calculate precision, recall, F1
-    precision = len(common) / len(pred_tokens) if pred_tokens else 0
-    recall = len(common) / len(ref_tokens) if ref_tokens else 0
-    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) else 0
-    return {'precision': precision, 'recall': recall, 'f1': f1}
-def evaluate_retrieval_quality(contexts, query, language):
-    """Evaluate the quality of retrieved contexts"""
-    # This is a placeholder implementation
-    return {
-        'language_match_ratio': 1.0,
-        'source_diversity': len(set([ctx.get('source', '') for ctx in contexts])) / max(1, len(contexts)),
-        'mrr': 1.0
-    }
-# PDF Processing and Vector Store
-def simple_process_pdfs(pdf_paths):
-    """Process PDF documents and return document objects"""
-    documents = []
-    print(f"Processing PDFs: {pdf_paths}")
-    for pdf_path in pdf_paths:
-        try:
-            if not os.path.exists(pdf_path):
-                print(f"Warning: {pdf_path} does not exist")
-                continue
-            print(f"Processing {pdf_path}...")
-            text = ""
-            with open(pdf_path, 'rb') as file:
-                reader = PyPDF2.PdfReader(file)
-                for page in reader.pages:
-                    page_text = page.extract_text()
-                    if page_text:  # If we got text from this page
-                        text += page_text + "\n\n"
-            if text.strip():  # If we got some text
-                doc = Document(
-                    page_content=text,
-                    metadata={"source": pdf_path, "filename": os.path.basename(pdf_path)}
-                )
-                documents.append(doc)
-                print(f"Successfully processed: {pdf_path}")
-            else:
-                print(f"Warning: No text extracted from {pdf_path}")
-        except Exception as e:
-            print(f"Error processing {pdf_path}: {e}")
-            import traceback
-            traceback.print_exc()
-    print(f"Processed {len(documents)} PDF documents")
-    return documents
-def create_vector_store(documents):
-    """Split documents into chunks and create a FAISS vector store"""
-    # Text splitter for breaking documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=500,
-        chunk_overlap=50,
-        separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
-    )
-    # Split documents into chunks
-    chunks = []
-    for doc in documents:
-        doc_chunks = text_splitter.split_text(doc.page_content)
-        # Preserve metadata for each chunk
-        chunks.extend([
-            Document(page_content=chunk, metadata=doc.metadata)
-            for chunk in doc_chunks
-        ])
-    print(f"Created {len(chunks)} chunks from {len(documents)} documents")
-    # Create a proper embedding function for LangChain
-    embedding_function = HuggingFaceEmbeddings(
-        model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
-    )
-    # Create FAISS index
-    vector_store = FAISS.from_documents(
-        chunks,
-        embedding_function
-    )
-    return vector_store
-# Model Loading and RAG System
-@spaces.GPU
-def load_model_and_tokenizer():
-    """Load the ALLaM-7B model and tokenizer with error handling"""
-    model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
-    print(f"Loading model: {model_name}")
-    try:
-        # Load tokenizer with correct settings
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            trust_remote_code=True,
-            use_fast=False
-        )
-        # Load model with appropriate settings for ALLaM
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.bfloat16,
-            trust_remote_code=True,
-            device_map="auto",
-        )
-        print("Model loaded successfully!")
-        return model, tokenizer
-    except Exception as e:
-        print(f"Error loading model: {e}")
-        import traceback
-        traceback.print_exc()
-        raise Exception(f"Failed to load model: {e}")
-def retrieve_context(query, vector_store, top_k=5):
-    """Retrieve most relevant document chunks for a given query"""
-    # Search the vector store using similarity search
-    results = vector_store.similarity_search_with_score(query, k=top_k)
-    # Format the retrieved contexts
-    contexts = []
-    for doc, score in results:
-        contexts.append({
-            "content": doc.page_content,
-            "source": doc.metadata.get("source", "Unknown"),
-            "relevance_score": score
-        })
-    return contexts
-@spaces.GPU
-def generate_response(query, contexts, model, tokenizer, language="auto"):
-    """Generate a response using retrieved contexts with ALLaM-specific formatting"""
-    # Auto-detect language if not specified
-    if language == "auto":
-        language = detect_language(query)
-    # Format the prompt based on language
-    if language == "arabic":
-        instruction = (
-            "أنت مساعد افتراضي يهتم برؤية السعودية 2030. استخدم المعلومات التالية للإجابة على السؤال. "
-            "إذا لم تعرف الإجابة، فقل بأمانة إنك لا تعرف."
-        )
-    else:  # english
-        instruction = (
-            "You are a virtual assistant for Saudi Vision 2030. Use the following information to answer the question. "
-            "If you don't know the answer, honestly say you don't know."
-        )
-    # Combine retrieved contexts
-    context_text = "\n\n".join([f"Document: {ctx['content']}" for ctx in contexts])
-    # Format the prompt for ALLaM instruction format
-    prompt = f"""<s>[INST] {instruction}
-Context:
-{context_text}
-Question: {query} [/INST]</s>"""
-    try:
-        # Generate response with appropriate parameters for ALLaM
-        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        # Generate with appropriate parameters
-        outputs = model.generate(
-            inputs.input_ids,
-            attention_mask=inputs.attention_mask,
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.9,
-            do_sample=True,
-            repetition_penalty=1.1
-        )
-        # Decode the response
-        full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract just the answer part (after the instruction)
-        response = full_output.split("[/INST]")[-1].strip()
-        # If response is empty for some reason, return the full output
-        if not response:
-            response = full_output
-        return response
-    except Exception as e:
-        print(f"Error during generation: {e}")
-        # Fallback response
-        return "I apologize, but I encountered an error while generating a response."
-# Assistant Class
-class Vision2030Assistant:
-    def __init__(self, model, tokenizer, vector_store):
-        self.model = model
-        self.tokenizer = tokenizer
-        self.vector_store = vector_store
-        self.conversation_history = []
-    def answer(self, user_query):
-        """Process a user query and return a response with sources"""
-        # Detect language
-        language = detect_language(user_query)
-        # Add user query to conversation history
-        self.conversation_history.append({"role": "user", "content": user_query})
-        # Get the full conversation context
-        conversation_context = "\n".join([
-            f"{'User' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}"
-            for msg in self.conversation_history[-6:]  # Keep last 3 turns (6 messages)
-        ])
-        # Enhance query with conversation context for better retrieval
-        enhanced_query = f"{conversation_context}\n{user_query}"
-        # Retrieve relevant contexts
-        contexts = retrieve_context(enhanced_query, self.vector_store, top_k=5)
-        # Generate response
-        response = generate_response(user_query, contexts, self.model, self.tokenizer, language)
-        # Add response to conversation history
-        self.conversation_history.append({"role": "assistant", "content": response})
-        # Also return sources for transparency
-        sources = [ctx.get("source", "Unknown") for ctx in contexts]
-        unique_sources = list(set(sources))
-        return response, unique_sources, contexts
-    def reset_conversation(self):
-        """Reset the conversation history"""
-        self.conversation_history = []
-        return "Conversation has been reset."
 # Comprehensive evaluation dataset
 comprehensive_evaluation_data = [
     # === Overview ===
@@ -407,352 +71,386 @@ comprehensive_evaluation_data = [
     }
 ]
-# Gradio Interface
-def initialize_system():
-    """Initialize the Vision 2030 Assistant system"""
-    # Define paths for PDF files in the root directory
-    pdf_files = ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]
-    # Process PDFs and create vector store
-    vector_store_dir = "vector_stores"
-    os.makedirs(vector_store_dir, exist_ok=True)
-    if os.path.exists(os.path.join(vector_store_dir, "index.faiss")):
-        print("Loading existing vector store...")
-        embedding_function = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
-        )
-        vector_store = FAISS.load_local(vector_store_dir, embedding_function)
-    else:
-        print("Creating new vector store...")
-        documents = simple_process_pdfs(pdf_files)
-        if not documents:
-            raise ValueError("No documents were processed successfully. Cannot continue.")
-        vector_store = create_vector_store(documents)
-        vector_store.save_local(vector_store_dir)
-    # Load model and tokenizer
-    model, tokenizer = load_model_and_tokenizer()
-    # Initialize assistant
-    assistant = Vision2030Assistant(model, tokenizer, vector_store)
-    return assistant
-def evaluate_response(query, response, reference):
-    """Evaluate a single response against a reference"""
-    # Calculate metrics
-    rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
-    rouge_scores = rouge.score(response, reference)
-    bleu_scores = calculate_bleu(response, reference)
-    meteor = calculate_meteor(response, reference)
-    word_metrics = calculate_f1_precision_recall(response, reference)
-    # Format results
-    evaluation_results = {
-        "ROUGE-1": f"{rouge_scores['rouge1'].fmeasure:.4f}",
-        "ROUGE-2": f"{rouge_scores['rouge2'].fmeasure:.4f}",
-        "ROUGE-L": f"{rouge_scores['rougeL'].fmeasure:.4f}",
-        "BLEU-1": f"{bleu_scores['bleu_1']:.4f}",
-        "BLEU-4": f"{bleu_scores['bleu_4']:.4f}",
-        "METEOR": f"{meteor:.4f}",
-        "Word Precision": f"{word_metrics['precision']:.4f}",
-        "Word Recall": f"{word_metrics['recall']:.4f}",
-        "Word F1": f"{word_metrics['f1']:.4f}"
-    }
-    return evaluation_results
-@spaces.GPU
-def run_evaluation_on_sample(assistant, sample_index=0):
-    """Run evaluation on a selected sample from the evaluation dataset"""
-    if sample_index < 0 or sample_index >= len(comprehensive_evaluation_data):
-        return "Invalid sample index", "", "", {}
-    # Get the sample
-    sample = comprehensive_evaluation_data[sample_index]
-    query = sample["query"]
-    reference = sample["reference"]
-    category = sample["category"]
-    language = sample["language"]
-    # Reset conversation and get response
-    assistant.reset_conversation()
-    response, sources, contexts = assistant.answer(query)
-    # Evaluate response
-    evaluation_results = evaluate_response(query, response, reference)
-    return query, response, reference, evaluation_results, sources, category, language
-def qualitative_evaluation_interface(assistant=None):
-    """Create a Gradio interface for qualitative evaluation"""
-    # If assistant is None, create a simplified interface
-    if assistant is None:
-        with gr.Blocks(title="Vision 2030 Assistant - Initialization Error") as interface:
-            gr.Markdown("# Vision 2030 Assistant - Initialization Error")
-            gr.Markdown("There was an error initializing the assistant. Please check the logs for details.")
-            gr.Textbox(label="Status", value="System initialization failed")
-        return interface
-    sample_options = [f"{i+1}. {item['query'][:50]}..." for i, item in enumerate(comprehensive_evaluation_data)]
-    with gr.Blocks(title="Vision 2030 Assistant - Qualitative Evaluation") as interface:
-        gr.Markdown("# Vision 2030 Assistant - Qualitative Evaluation")
-        gr.Markdown("This interface allows you to evaluate the Vision 2030 Assistant on predefined samples or your own queries.")
-        with gr.Tab("Sample Evaluation"):
-            gr.Markdown("### Evaluate the assistant on predefined samples")
-            sample_dropdown = gr.Dropdown(
-                choices=sample_options,
-                label="Select a sample query",
-                value=sample_options[0] if sample_options else None
-            )
-            eval_button = gr.Button("Evaluate Sample")
-            with gr.Row():
-                with gr.Column():
-                    sample_query = gr.Textbox(label="Query")
-                    sample_category = gr.Textbox(label="Category")
-                    sample_language = gr.Textbox(label="Language")
-                with gr.Column():
-                    sample_response = gr.Textbox(label="Assistant Response")
-                    sample_reference = gr.Textbox(label="Reference Answer")
-                    sample_sources = gr.Textbox(label="Sources Used")
-            with gr.Row():
-                metrics_display = gr.JSON(label="Evaluation Metrics")
-        with gr.Tab("Custom Evaluation"):
-            gr.Markdown("### Evaluate the assistant on your own query")
-            custom_query = gr.Textbox(
-                lines=3,
-                placeholder="Enter your question about Saudi Vision 2030...",
-                label="Your Query"
             )
-            custom_reference = gr.Textbox(
-                lines=3,
-                placeholder="Enter a reference answer (optional)...",
-                label="Reference Answer (Optional)"
             )
-            custom_eval_button = gr.Button("Get Response and Evaluate")
-            custom_response = gr.Textbox(label="Assistant Response")
-            custom_sources = gr.Textbox(label="Sources Used")
-            custom_metrics = gr.JSON(
-                label="Evaluation Metrics (if reference provided)",
-                visible=True
-            )
-        with gr.Tab("Conversation Mode"):
-            gr.Markdown("### Have a conversation with the Vision 2030 Assistant")
-            chatbot = gr.Chatbot(label="Conversation")
-            conv_input = gr.Textbox(
-                placeholder="Ask about Saudi Vision 2030...",
-                label="Your message"
             )
-            with gr.Row():
-                conv_button = gr.Button("Send")
-                reset_button = gr.Button("Reset Conversation")
-            conv_sources = gr.Textbox(label="Sources Used")
-        # Sample evaluation event handlers
-        def handle_sample_selection(selection):
-            if not selection:
-                return "", "", "", "", "", "", ""
-            # Extract index from the selection string
-            try:
-                index = int(selection.split(".")[0]) - 1
-                query, response, reference, metrics, sources, category, language = run_evaluation_on_sample(assistant, index)
-                sources_str = ", ".join(sources)
-                return query, response, reference, metrics, sources_str, category, language
-            except Exception as e:
-                print(f"Error in handle_sample_selection: {e}")
-                import traceback
-                traceback.print_exc()
-                return f"Error processing selection: {e}", "", "", {}, "", "", ""
-        eval_button.click(
-            handle_sample_selection,
-            inputs=[sample_dropdown],
-            outputs=[sample_query, sample_response, sample_reference, metrics_display,
-                    sample_sources, sample_category, sample_language]
-        )
-        sample_dropdown.change(
-            handle_sample_selection,
-            inputs=[sample_dropdown],
-            outputs=[sample_query, sample_response, sample_reference, metrics_display,
-                    sample_sources, sample_category, sample_language]
-        )
-        # Custom evaluation event handlers
-        @spaces.GPU
-        def handle_custom_evaluation(query, reference):
-            if not query:
-                return "Please enter a query", "", {}
-            # Reset conversation to ensure clean state
-            assistant.reset_conversation()
-            # Get response
-            response, sources, _ = assistant.answer(query)
-            sources_str = ", ".join(sources)
-            # Evaluate if reference is provided
-            metrics = {}
-            if reference:
-                metrics = evaluate_response(query, response, reference)
-            return response, sources_str, metrics
-        custom_eval_button.click(
-            handle_custom_evaluation,
-            inputs=[custom_query, custom_reference],
-            outputs=[custom_response, custom_sources, custom_metrics]
-        )
-        # Conversation mode event handlers
-        @spaces.GPU
-        def handle_conversation(message, history):
-            if not message:
-                return history, "", ""
-            # Get response
-            response, sources, _ = assistant.answer(message)
-            sources_str = ", ".join(sources)
-            # Update history
-            history = history + [[message, response]]
-            return history, "", sources_str
-        def reset_conv():
-            result = assistant.reset_conversation()
-            return [], result, ""
-        conv_button.click(
-            handle_conversation,
-            inputs=[conv_input, chatbot],
-            outputs=[chatbot, conv_input, conv_sources]
-        )
-        reset_button.click(
-            reset_conv,
-            inputs=[],
-            outputs=[chatbot, conv_input, conv_sources]
-        )
-    return interface
-# Main function to run in Hugging Face Space
 def main():
-    # Start with a loading interface
-    with gr.Blocks(title="Vision 2030 Assistant - Loading") as loading_interface:
-        gr.Markdown("# Vision 2030 Assistant")
-        gr.Markdown("System is initializing. This may take a few minutes...")
-        loading_status = gr.Textbox(value="Loading system...", label="Status")
-    interface = loading_interface.queue()
-    # Initialize the system
-    try:
-        print("Starting system initialization...")
-        assistant = initialize_system()
-        print("Creating interface...")
-        full_interface = qualitative_evaluation_interface(assistant)
-        print("System ready!")
-        # Will replace the loading interface
-        return full_interface
-    except Exception as e:
-        print(f"Error during initialization: {e}")
-        import traceback
-        traceback.print_exc()
-        # Create a simple error interface
-        with gr.Blocks(title="Vision 2030 Assistant - Error") as error_interface:
-            gr.Markdown("# Vision 2030 Assistant - Initialization Error")
-            gr.Markdown("There was an error initializing the assistant.")
-            # Display error details
-            gr.Textbox(
-                value=f"Error: {str(e)}",
-                label="Error Details",
-                lines=5
-            )
-            # Show potential solutions
-            gr.Markdown("## Potential Solutions")
-            gr.Markdown("""
-            1. Check that all dependencies are installed:
-               - sentencepiece
-               - accelerate
-               - transformers
-               - langchain and langchain-community
-            2. Verify PDF files are accessible and in the correct location
-            3. Check GPU memory is sufficient for loading the model
-            """)
-            # Add a button to check system
-            def check_system():
-                results = []
-                # Check dependencies
-                for lib in ["torch", "transformers", "sentencepiece", "accelerate"]:
-                    try:
-                        module = __import__(lib)
-                        if hasattr(module, "__version__"):
-                            results.append(f"✓ {lib}: {module.__version__}")
-                        else:
-                            results.append(f"✓ {lib}: Installed")
-                    except ImportError:
-                        results.append(f"✗ {lib}: Not installed")
-                # Check GPU
-                try:
-                    import torch
-                    results.append(f"CUDA available: {torch.cuda.is_available()}")
-                    if torch.cuda.is_available():
-                        results.append(f"GPU: {torch.cuda.get_device_name(0)}")
-                        results.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
-                except:
-                    results.append("Could not check GPU status")
-                # Check PDFs
                 for pdf_file in ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]:
                     if os.path.exists(pdf_file):
                         size = os.path.getsize(pdf_file) / (1024 * 1024)  # Size in MB
-                        results.append(f"{pdf_file}: Found ({size:.2f} MB)")
                     else:
-                        results.append(f"{pdf_file}: Not found")
-                return "\n".join(results)
-            check_btn = gr.Button("Run System Check")
-            system_status = gr.Textbox(label="System Status", lines=15)
-            check_btn.click(check_system, inputs=[], outputs=[system_status])
-        return error_interface
 if __name__ == "__main__":
     demo = main()
     demo.launch()

 import os
 import re
 import json
 from tqdm import tqdm
 from pathlib import Path
 import spaces
 import gradio as gr
+# WARNING: Don't import torch, cuda, or GPU-related modules at the top level
+# They must ONLY be imported inside functions decorated with @spaces.GPU
+# Helper functions that don't use GPU
 def safe_tokenize(text):
     """Pure regex tokenizer with no NLTK dependency"""
     if not text:
     is_arabic = len(arabic_chars) > len(text) * 0.5
     return "arabic" if is_arabic else "english"
 # Comprehensive evaluation dataset
 comprehensive_evaluation_data = [
     # === Overview ===
     }
 ]
+# RAG Service class
+class Vision2030Service:
+    def __init__(self):
+        self.initialized = False
+        self.model = None
+        self.tokenizer = None
+        self.vector_store = None
+        self.conversation_history = []
+    @spaces.GPU
+    def initialize(self):
+        """Initialize the system - ALL GPU operations must happen here"""
+        if self.initialized:
+            return True
+        try:
+            # Import all GPU-dependent libraries only inside this function
+            import torch
+            import PyPDF2
+            from transformers import AutoTokenizer, AutoModelForCausalLM
+            from sentence_transformers import SentenceTransformer
+            from langchain.text_splitter import RecursiveCharacterTextSplitter
+            from langchain_community.vectorstores import FAISS
+            from langchain.schema import Document
+            from langchain.embeddings import HuggingFaceEmbeddings
+            # Define paths for PDF files
+            pdf_files = ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]
+            # Process PDFs and create vector store
+            vector_store_dir = "vector_stores"
+            os.makedirs(vector_store_dir, exist_ok=True)
+            if os.path.exists(os.path.join(vector_store_dir, "index.faiss")):
+                print("Loading existing vector store...")
+                embedding_function = HuggingFaceEmbeddings(
+                    model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
+                )
+                self.vector_store = FAISS.load_local(vector_store_dir, embedding_function)
+            else:
+                print("Creating new vector store...")
+                # Process PDFs
+                documents = []
+                for pdf_path in pdf_files:
+                    if not os.path.exists(pdf_path):
+                        print(f"Warning: {pdf_path} does not exist")
+                        continue
+                    print(f"Processing {pdf_path}...")
+                    text = ""
+                    with open(pdf_path, 'rb') as file:
+                        reader = PyPDF2.PdfReader(file)
+                        for page in reader.pages:
+                            page_text = page.extract_text()
+                            if page_text:
+                                text += page_text + "\n\n"
+                    if text.strip():
+                        doc = Document(
+                            page_content=text,
+                            metadata={"source": pdf_path, "filename": os.path.basename(pdf_path)}
+                        )
+                        documents.append(doc)
+                if not documents:
+                    raise ValueError("No documents were processed successfully.")
+                # Split into chunks
+                text_splitter = RecursiveCharacterTextSplitter(
+                    chunk_size=500,
+                    chunk_overlap=50,
+                    separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""]
+                )
+                chunks = []
+                for doc in documents:
+                    doc_chunks = text_splitter.split_text(doc.page_content)
+                    chunks.extend([
+                        Document(page_content=chunk, metadata=doc.metadata)
+                        for chunk in doc_chunks
+                    ])
+                # Create vector store
+                embedding_function = HuggingFaceEmbeddings(
+                    model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
+                )
+                self.vector_store = FAISS.from_documents(chunks, embedding_function)
+                self.vector_store.save_local(vector_store_dir)
+            # Load model
+            model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_name,
+                trust_remote_code=True,
+                use_fast=False
             )
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.bfloat16,
+                trust_remote_code=True,
+                device_map="auto",
             )
+            self.initialized = True
+            return True
+        except Exception as e:
+            import traceback
+            print(f"Initialization error: {e}")
+            print(traceback.format_exc())
+            return False
+    @spaces.GPU
+    def retrieve_context(self, query, top_k=5):
+        """Retrieve contexts from vector store"""
+        # Import must be inside the function to avoid CUDA init in main process
+        if not self.initialized:
+            return []
+        try:
+            results = self.vector_store.similarity_search_with_score(query, k=top_k)
+            contexts = []
+            for doc, score in results:
+                contexts.append({
+                    "content": doc.page_content,
+                    "source": doc.metadata.get("source", "Unknown"),
+                    "relevance_score": score
+                })
+            return contexts
+        except Exception as e:
+            print(f"Error retrieving context: {e}")
+            return []
+    @spaces.GPU
+    def generate_response(self, query, contexts, language="auto"):
+        """Generate response using the model"""
+        # Import must be inside the function to avoid CUDA init in main process
+        import torch
+        if not self.initialized or self.model is None or self.tokenizer is None:
+            return "I'm still initializing. Please try again in a moment."
+        try:
+            # Auto-detect language if not specified
+            if language == "auto":
+                language = detect_language(query)
+            # Format the prompt based on language
+            if language == "arabic":
+                instruction = (
+                    "أنت مساعد ��فتراضي يهتم برؤية السعودية 2030. استخدم المعلومات التالية للإجابة على السؤال. "
+                    "إذا لم تعرف الإجابة، فقل بأمانة إنك لا تعرف."
+                )
+            else:  # english
+                instruction = (
+                    "You are a virtual assistant for Saudi Vision 2030. Use the following information to answer the question. "
+                    "If you don't know the answer, honestly say you don't know."
+                )
+            # Combine retrieved contexts
+            context_text = "\n\n".join([f"Document: {ctx['content']}" for ctx in contexts])
+            # Format the prompt for ALLaM instruction format
+            prompt = f"""<s>[INST] {instruction}
+Context:
+{context_text}
+Question: {query} [/INST]</s>"""
+            # Generate response
+            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+            outputs = self.model.generate(
+                inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                max_new_tokens=512,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                repetition_penalty=1.1
             )
+            # Decode the response
+            full_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract just the answer part (after the instruction)
+            response = full_output.split("[/INST]")[-1].strip()
+            # If response is empty for some reason, return the full output
+            if not response:
+                response = full_output
+            return response
+        except Exception as e:
+            import traceback
+            print(f"Error generating response: {e}")
+            print(traceback.format_exc())
+            return f"Sorry, I encountered an error while generating a response."
+    @spaces.GPU
+    def answer_question(self, query):
+        """Process a user query and return a response with sources"""
+        if not self.initialized:
+            if not self.initialize():
+                return "System initialization failed. Please check the logs.", []
+        try:
+            # Add user query to conversation history
+            self.conversation_history.append({"role": "user", "content": query})
+            # Get the full conversation context
+            conversation_context = "\n".join([
+                f"{'User' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}"
+                for msg in self.conversation_history[-6:]  # Keep last 3 turns
+            ])
+            # Enhance query with conversation context
+            enhanced_query = f"{conversation_context}\n{query}"
+            # Retrieve relevant contexts
+            contexts = self.retrieve_context(enhanced_query, top_k=5)
+            # Generate response
+            response = self.generate_response(query, contexts)
+            # Add response to conversation history
+            self.conversation_history.append({"role": "assistant", "content": response})
+            # Get sources
+            sources = [ctx.get("source", "Unknown") for ctx in contexts]
+            unique_sources = list(set(sources))
+            return response, unique_sources
+        except Exception as e:
+            import traceback
+            print(f"Error answering question: {e}")
+            print(traceback.format_exc())
+            return f"Sorry, I encountered an error: {str(e)}", []
+    def reset_conversation(self):
+        """Reset the conversation history"""
+        self.conversation_history = []
+        return "Conversation has been reset."
+# Main function with Gradio UI
 def main():
+    # Create the Vision 2030 service
+    service = Vision2030Service()
+    # Build the Gradio interface
+    with gr.Blocks(title="Vision 2030 Assistant") as demo:
+        gr.Markdown("# Vision 2030 Assistant")
+        gr.Markdown("Ask questions about Saudi Vision 2030 in English or Arabic")
+        with gr.Tab("Chat"):
+            chatbot = gr.Chatbot()
+            msg = gr.Textbox(label="Your question", placeholder="Ask about Vision 2030...")
+            clear = gr.Button("Clear History")
+            @spaces.GPU
+            def respond(message, history):
+                if not message:
+                    return history, ""
+                response, sources = service.answer_question(message)
+                sources_text = ", ".join(sources) if sources else "No specific sources"
+                # Format the response to include sources
+                full_response = f"{response}\n\nSources: {sources_text}"
+                return history + [[message, full_response]], ""
+            def reset_chat():
+                service.reset_conversation()
+                return [], "Conversation history has been reset."
+            msg.submit(respond, [msg, chatbot], [chatbot, msg])
+            clear.click(reset_chat, None, [chatbot, msg])
+        with gr.Tab("System Status"):
+            init_btn = gr.Button("Initialize System")
+            status_box = gr.Textbox(label="Status", value="System not initialized")
+            @spaces.GPU
+            def initialize_system():
+                success = service.initialize()
+                if success:
+                    return "System initialized successfully!"
+                else:
+                    return "System initialization failed. Check logs for details."
+            init_btn.click(initialize_system, None, status_box)
+            # PDF Check section
+            gr.Markdown("### PDF Status")
+            pdf_btn = gr.Button("Check PDF Files")
+            pdf_status = gr.Textbox(label="PDF Files")
+            def check_pdfs():
+                result = []
                 for pdf_file in ["saudi_vision203.pdf", "saudi_vision2030_ar.pdf"]:
                     if os.path.exists(pdf_file):
                         size = os.path.getsize(pdf_file) / (1024 * 1024)  # Size in MB
+                        result.append(f"{pdf_file}: Found ({size:.2f} MB)")
                     else:
+                        result.append(f"{pdf_file}: Not found")
+                return "\n".join(result)
+            pdf_btn.click(check_pdfs, None, pdf_status)
+            # System check section
+            gr.Markdown("### Dependencies")
+            sys_btn = gr.Button("Check Dependencies")
+            sys_status = gr.Textbox(label="Dependencies Status")
+            @spaces.GPU
+            def check_dependencies():
+                result = []
+                # Safe imports inside GPU-decorated function
+                try:
+                    import torch
+                    result.append(f"✓ PyTorch: {torch.__version__}")
+                except ImportError:
+                    result.append("✗ PyTorch: Not installed")
+                try:
+                    import transformers
+                    result.append(f"✓ Transformers: {transformers.__version__}")
+                except ImportError:
+                    result.append("✗ Transformers: Not installed")
+                try:
+                    import sentencepiece
+                    result.append("✓ SentencePiece: Installed")
+                except ImportError:
+                    result.append("✗ SentencePiece: Not installed")
+                try:
+                    import accelerate
+                    result.append(f"✓ Accelerate: {accelerate.__version__}")
+                except ImportError:
+                    result.append("✗ Accelerate: Not installed")
+                try:
+                    import langchain
+                    result.append(f"�� LangChain: {langchain.__version__}")
+                except ImportError:
+                    result.append("✗ LangChain: Not installed")
+                try:
+                    import langchain_community
+                    result.append(f"✓ LangChain Community: {langchain_community.__version__}")
+                except ImportError:
+                    result.append("✗ LangChain Community: Not installed")
+                return "\n".join(result)
+            sys_btn.click(check_dependencies, None, sys_status)
+        with gr.Tab("Sample Questions"):
+            gr.Markdown("### Sample Questions to Try")
+            sample_questions = []
+            for item in comprehensive_evaluation_data:
+                sample_questions.append(item["query"])
+            questions_md = "\n".join([f"- {q}" for q in sample_questions])
+            gr.Markdown(questions_md)
+    return demo
 if __name__ == "__main__":
     demo = main()
+    demo.queue()
     demo.launch()