Spaces:

abdull4h
/

vision-2030-virtual-assistant

Running on Zero

App Files Files Community

abdull4h commited on Mar 19

Commit

f4c0f01

verified ·

1 Parent(s): 23d4792

Update app.py

Browse files

Files changed (1) hide show

app.py +534 -112

app.py CHANGED Viewed

@@ -1,4 +1,12 @@
-# Vision 2030 Virtual Assistant with Arabic (ALLaM-7B) and English (Mistral-7B-Instruct) + RAG + Improved Prompting
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
@@ -6,131 +14,545 @@ from langdetect import detect
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
-# ----------------------------
-# Load Arabic Model (ALLaM-7B)
-# ----------------------------
-print("Loading ALLaM-7B-Instruct-preview for Arabic...")
-arabic_model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
-arabic_tokenizer = AutoTokenizer.from_pretrained(arabic_model_id)
-arabic_model = AutoModelForCausalLM.from_pretrained(arabic_model_id, device_map="auto")
-arabic_pipe = pipeline("text-generation", model=arabic_model, tokenizer=arabic_tokenizer)
-# ----------------------------
-# Load English Model (Mistral-7B-Instruct)
-# ----------------------------
-print("Loading Mistral-7B-Instruct-v0.2 for English...")
-english_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
-english_tokenizer = AutoTokenizer.from_pretrained(english_model_id)
-english_model = AutoModelForCausalLM.from_pretrained(english_model_id, device_map="auto")
-english_pipe = pipeline("text-generation", model=english_model, tokenizer=english_tokenizer)
-# ----------------------------
-# Load Embedding Models for Retrieval
-# ----------------------------
-print("Loading Embedding Models for Retrieval...")
-arabic_embedder = SentenceTransformer('CAMeL-Lab/bert-base-arabic-camelbert-ca')
-english_embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-# ----------------------------
-# Prepare FAISS Index (dummy example)
-# ----------------------------
-# In real scenario, load Vision 2030 documents, preprocess & embed
-# Here we'll create dummy data for demonstration
-documents = [
-    {"text": "Vision 2030 aims to diversify the Saudi economy.", "lang": "en"},
-    {"text": "رؤية 2030 تهدف إلى تنويع الاقتصاد السعودي.", "lang": "ar"}
-]
-# Embed documents and build index
-english_vectors = []
-arabic_vectors = []
-english_texts = []
-arabic_texts = []
-for doc in documents:
-    if doc["lang"] == "en":
-        vec = english_embedder.encode(doc["text"])
-        english_vectors.append(vec)
-        english_texts.append(doc["text"])
-    else:
-        vec = arabic_embedder.encode(doc["text"])
-        arabic_vectors.append(vec)
-        arabic_texts.append(doc["text"])
-# FAISS indexes
-english_index = faiss.IndexFlatL2(len(english_vectors[0]))
-english_index.add(np.array(english_vectors))
-arabic_index = faiss.IndexFlatL2(len(arabic_vectors[0]))
-arabic_index.add(np.array(arabic_vectors))
-# ----------------------------
-# Define the RAG response function with Improved Prompting
-# ----------------------------
-def retrieve_and_generate(user_input):
-    try:
-        lang = detect(user_input)
-    except:
-        lang = "en"  # Default fallback
-    if lang == "ar":
-        print("Detected Arabic input")
-        query_vec = arabic_embedder.encode(user_input)
-        D, I = arabic_index.search(np.array([query_vec]), k=1)
-        context = arabic_texts[I[0][0]] if I[0][0] >= 0 else ""
-        # Improved Arabic Prompt
-        input_text = (
-            f"أنت خبير في رؤية السعودية 2030.\n"
-            f"إليك بعض المعلومات المهمة:\n{context}\n\n"
-            f"مثال:\n"
-            f"السؤال: ما هي ركائز رؤية 2030؟\n"
-            f"الإجابة: ركائز رؤية 2030 هي مجتمع حيوي، اقتصاد مزدهر، ووطن طموح.\n\n"
-            f"أجب عن سؤال المستخدم بشكل واضح ودقيق.\n"
-            f"السؤال: {user_input}\n"
-            f"الإجابة:"
-        )
-        response = arabic_pipe(input_text, max_new_tokens=256, do_sample=True, temperature=0.7)
-        reply = response[0]['generated_text']
-    else:
-        print("Detected English input")
-        query_vec = english_embedder.encode(user_input)
-        D, I = english_index.search(np.array([query_vec]), k=1)
-        context = english_texts[I[0][0]] if I[0][0] >= 0 else ""
-        # Improved English Prompt
-        input_text = (
-            f"You are an expert on Saudi Arabia's Vision 2030.\n"
-            f"Here is some relevant information:\n{context}\n\n"
-            f"Example:\n"
-            f"Question: What are the key pillars of Vision 2030?\n"
-            f"Answer: The key pillars are a vibrant society, a thriving economy, and an ambitious nation.\n\n"
-            f"Answer the user's question clearly and accurately.\n"
-            f"Question: {user_input}\n"
-            f"Answer:"
-        )
-        response = english_pipe(input_text, max_new_tokens=256, do_sample=True, temperature=0.7)
-        reply = response[0]['generated_text']
-    return reply
-# ----------------------------
-# Gradio UI
-# ----------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("# Vision 2030 Virtual Assistant 🌍\n\nSupports Arabic & English queries about Vision 2030 (with RAG retrieval and improved prompting).")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(label="Ask me anything about Vision 2030")
-    clear = gr.Button("Clear")
     def chat(message, history):
-        reply = retrieve_and_generate(message)
         history.append((message, reply))
         return history, ""
-    msg.submit(chat, [msg, chatbot], [chatbot, msg])
-    clear.click(lambda: None, None, chatbot, queue=False)
-# Launching the space
-demo.launch()

+# Vision 2030 Virtual Assistant with Arabic (ALLaM-7B) and English (Mistral-7B-Instruct) + RAG + Evaluation Framework
+"""
+Enhanced implementation of the Vision 2030 Virtual Assistant that meets all project requirements:
+1. Implements proper NLP task structure (bilingual QA system)
+2. Adds comprehensive evaluation framework for quantitative and qualitative assessment
+3. Improves RAG implementation with better retrieval and document processing
+4. Adds user feedback collection for continuous improvement
+5. Includes structured logging and performance monitoring
+"""
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
+import json
+import time
+import logging
+import os
+import re
+from datetime import datetime
+from sklearn.metrics import precision_recall_fscore_support, accuracy_score
+import pandas as pd
+import matplotlib.pyplot as plt
+import PyPDF2
+import io
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("vision2030_assistant.log"),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger('vision2030_assistant')
+class Vision2030Assistant:
+    def __init__(self, pdf_path="vision2030.pdf", eval_data_path="evaluation_data.json"):
+        """
+        Initialize the Vision 2030 Assistant with models, knowledge base, and evaluation framework
+        Args:
+            pdf_path: Path to the Vision 2030 PDF document
+            eval_data_path: Path to evaluation dataset
+        """
+        logger.info("Initializing Vision 2030 Assistant...")
+        self.load_models()
+        self.load_and_process_documents(pdf_path)
+        self.setup_evaluation_framework(eval_data_path)
+        self.response_history = []
+        logger.info("Vision 2030 Assistant initialized successfully")
+    def load_models(self):
+        """Load language models and embedding models for both Arabic and English"""
+        logger.info("Loading language and embedding models...")
+        # Load Arabic Model (ALLaM-7B)
+        try:
+            self.arabic_model_id = "ALLaM-AI/ALLaM-7B-Instruct-preview"
+            self.arabic_tokenizer = AutoTokenizer.from_pretrained(self.arabic_model_id)
+            self.arabic_model = AutoModelForCausalLM.from_pretrained(self.arabic_model_id, device_map="auto")
+            self.arabic_pipe = pipeline("text-generation", model=self.arabic_model, tokenizer=self.arabic_tokenizer)
+            logger.info("Arabic model loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading Arabic model: {str(e)}")
+            raise
+        # Load English Model (Mistral-7B-Instruct)
+        try:
+            self.english_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+            self.english_tokenizer = AutoTokenizer.from_pretrained(self.english_model_id)
+            self.english_model = AutoModelForCausalLM.from_pretrained(self.english_model_id, device_map="auto")
+            self.english_pipe = pipeline("text-generation", model=self.english_model, tokenizer=self.english_tokenizer)
+            logger.info("English model loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading English model: {str(e)}")
+            raise
+        # Load Embedding Models for Retrieval
+        try:
+            self.arabic_embedder = SentenceTransformer('CAMeL-Lab/bert-base-arabic-camelbert-ca')
+            self.english_embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+            logger.info("Embedding models loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading embedding models: {str(e)}")
+            raise
+    def load_and_process_documents(self, pdf_path):
+        """Load and process the Vision 2030 document from PDF"""
+        logger.info(f"Processing Vision 2030 document from {pdf_path}")
+        # Initialize empty document lists
+        self.english_texts = []
+        self.arabic_texts = []
+        try:
+            # Check if PDF exists
+            if os.path.exists(pdf_path):
+                # Extract text from PDF
+                with open(pdf_path, 'rb') as file:
+                    reader = PyPDF2.PdfReader(file)
+                    full_text = ""
+                    for page_num in range(len(reader.pages)):
+                        page = reader.pages[page_num]
+                        full_text += page.extract_text() + "\n"
+                # Split into chunks (simple approach - could be improved with better text segmentation)
+                chunks = [chunk.strip() for chunk in re.split(r'\n\s*\n', full_text) if chunk.strip()]
+                # Detect language and add to appropriate list
+                for chunk in chunks:
+                    try:
+                        lang = detect(chunk)
+                        if lang == "ar":
+                            self.arabic_texts.append(chunk)
+                        else:  # Default to English for other languages
+                            self.english_texts.append(chunk)
+                    except:
+                        # If language detection fails, assume English
+                        self.english_texts.append(chunk)
+                logger.info(f"Processed {len(self.arabic_texts)} Arabic and {len(self.english_texts)} English chunks")
+            else:
+                logger.warning(f"PDF file not found at {pdf_path}. Using fallback sample data.")
+                self._create_sample_data()
+        except Exception as e:
+            logger.error(f"Error processing PDF: {str(e)}")
+            logger.info("Using fallback sample data")
+            self._create_sample_data()
+        # Create FAISS indices
+        self._create_indices()
+    def _create_sample_data(self):
+        """Create sample Vision 2030 data if PDF processing fails"""
+        logger.info("Creating sample Vision 2030 data")
+        # English sample texts
+        self.english_texts = [
+            "Vision 2030 is Saudi Arabia's strategic framework to reduce dependence on oil, diversify the economy, and develop public sectors.",
+            "The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation.",
+            "The Saudi Public Investment Fund (PIF) plays a crucial role in Vision 2030 by investing in strategic sectors.",
+            "NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030.",
+            "Vision 2030 aims to increase women's participation in the workforce from 22% to 30%.",
+            "The Red Sea Project is a Vision 2030 initiative to develop luxury tourism destinations across 50 islands off Saudi Arabia's Red Sea coast.",
+            "Qiddiya is a entertainment mega-project being built in Riyadh as part of Vision 2030.",
+            "Vision 2030 targets increasing the private sector's contribution to GDP from 40% to 65%.",
+            "One goal of Vision 2030 is to increase foreign direct investment from 3.8% to 5.7% of GDP.",
+            "Vision 2030 includes plans to develop the digital infrastructure and support for tech startups in Saudi Arabia."
+        ]
+        # Arabic sample texts (same content as English)
+        self.arabic_texts = [
+            "رؤية 2030 هي الإطار الاستراتيجي للمملكة العربية السعودية للحد من الاعتماد على النفط وتنويع الاقتصاد وتطوير القطاعات العامة.",
+            "الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح.",
+            "يلعب صندوق الاستثمارات العامة السعودي دورًا محوريًا في رؤية 2030 من خلال الاستثمار في القطاعات الاستراتيجية.",
+            "نيوم هي مدينة ذكية مخططة عبر الحدود في مقاطعة تبوك شمال غرب المملكة العربية السعودية، وهي مشروع رئيسي من رؤية 2030.",
+            "تهدف رؤية 2030 إلى زيادة مشاركة المرأة في القوى العاملة من 22٪ إلى 30٪.",
+            "مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي.",
+            "القدية هي مشروع ترفيهي ضخم يتم بناؤه في الرياض كجزء من رؤية 2030.",
+            "تستهدف رؤية 2030 زيادة مساهمة القطاع الخاص في الناتج المحلي الإجمالي من 40٪ إلى 65٪.",
+            "أحد أهداف رؤية 2030 هو زيادة الاستثمار الأجنبي المباشر من 3.8٪ إلى 5.7٪ من الناتج المحلي الإجمالي.",
+            "تتضمن رؤية 2030 خططًا لتطوير البنية التحتية الرقمية والدعم للشركات الناشئة التكنولوجية في المملكة العربية السعودية."
+        ]
+    def _create_indices(self):
+        """Create FAISS indices for fast text retrieval"""
+        logger.info("Creating FAISS indices for text retrieval")
+        try:
+            # Process and embed English texts
+            self.english_vectors = []
+            for text in self.english_texts:
+                vec = self.english_embedder.encode(text)
+                self.english_vectors.append(vec)
+            # Create English index
+            if self.english_vectors:
+                self.english_index = faiss.IndexFlatL2(len(self.english_vectors[0]))
+                self.english_index.add(np.array(self.english_vectors))
+                logger.info(f"Created English index with {len(self.english_vectors)} vectors")
+            else:
+                logger.warning("No English texts to index")
+            # Process and embed Arabic texts
+            self.arabic_vectors = []
+            for text in self.arabic_texts:
+                vec = self.arabic_embedder.encode(text)
+                self.arabic_vectors.append(vec)
+            # Create Arabic index
+            if self.arabic_vectors:
+                self.arabic_index = faiss.IndexFlatL2(len(self.arabic_vectors[0]))
+                self.arabic_index.add(np.array(self.arabic_vectors))
+                logger.info(f"Created Arabic index with {len(self.arabic_vectors)} vectors")
+            else:
+                logger.warning("No Arabic texts to index")
+        except Exception as e:
+            logger.error(f"Error creating FAISS indices: {str(e)}")
+            raise
+    def setup_evaluation_framework(self, eval_data_path):
+        """Set up the evaluation framework with test data and metrics"""
+        logger.info("Setting up evaluation framework")
+        # Initialize metrics trackers
+        self.metrics = {
+            "response_times": [],
+            "user_ratings": [],
+            "retrieval_precision": [],
+            "factual_accuracy": []
+        }
+        # Load evaluation data if exists, otherwise create sample
+        try:
+            if os.path.exists(eval_data_path):
+                with open(eval_data_path, 'r', encoding='utf-8') as f:
+                    self.eval_data = json.load(f)
+                logger.info(f"Loaded {len(self.eval_data)} evaluation examples from {eval_data_path}")
+            else:
+                logger.warning(f"Evaluation data not found at {eval_data_path}. Creating sample evaluation data.")
+                self._create_sample_eval_data()
+        except Exception as e:
+            logger.error(f"Error loading evaluation data: {str(e)}")
+            self._create_sample_eval_data()
+    def _create_sample_eval_data(self):
+        """Create sample evaluation data with ground truth"""
+        self.eval_data = [
+            {
+                "question": "What are the key pillars of Vision 2030?",
+                "lang": "en",
+                "reference_answer": "The key pillars of Vision 2030 are a vibrant society, a thriving economy, and an ambitious nation."
+            },
+            {
+                "question": "ما هي الركائز الرئيسية لرؤية 2030؟",
+                "lang": "ar",
+                "reference_answer": "الركائز الرئيسية لرؤية 2030 هي مجتمع حيوي، واقتصاد مزدهر، ووطن طموح."
+            },
+            {
+                "question": "What is NEOM?",
+                "lang": "en",
+                "reference_answer": "NEOM is a planned cross-border smart city in the Tabuk Province of northwestern Saudi Arabia, a key project of Vision 2030."
+            },
+            {
+                "question": "ما هو مشروع البح�� الأحمر؟",
+                "lang": "ar",
+                "reference_answer": "مشروع البحر الأحمر هو مبادرة رؤية 2030 لتطوير وجهات سياحية فاخرة عبر 50 جزيرة قبالة ساحل البحر الأحمر السعودي."
+            }
+        ]
+        logger.info(f"Created {len(self.eval_data)} sample evaluation examples")
+    def retrieve_context(self, query, lang):
+        """Retrieve relevant context for a query based on language"""
+        start_time = time.time()
+        try:
+            if lang == "ar":
+                query_vec = self.arabic_embedder.encode(query)
+                D, I = self.arabic_index.search(np.array([query_vec]), k=2)  # Get top 2 most relevant chunks
+                context = "\n".join([self.arabic_texts[i] for i in I[0] if i < len(self.arabic_texts) and i >= 0])
+            else:
+                query_vec = self.english_embedder.encode(query)
+                D, I = self.english_index.search(np.array([query_vec]), k=2)  # Get top 2 most relevant chunks
+                context = "\n".join([self.english_texts[i] for i in I[0] if i < len(self.english_texts) and i >= 0])
+            retrieval_time = time.time() - start_time
+            logger.info(f"Retrieved context in {retrieval_time:.2f}s")
+            return context
+        except Exception as e:
+            logger.error(f"Error retrieving context: {str(e)}")
+            return ""
+    def generate_response(self, user_input):
+        """Generate a response to user input using the appropriate model and retrieval system"""
+        start_time = time.time()
+        # Default response in case of failure
+        default_response = {
+            "en": "I apologize, but I couldn't process your request properly. Please try again.",
+            "ar": "أعتذر، لم أتمكن من معالجة طلبك بشكل صحيح. الرجاء المحاولة مرة أخرى."
+        }
+        try:
+            # Detect language
+            try:
+                lang = detect(user_input)
+                if lang != "ar":  # Simplify to just Arabic vs non-Arabic
+                    lang = "en"
+            except:
+                lang = "en"  # Default fallback
+            logger.info(f"Detected language: {lang}")
+            # Retrieve relevant context
+            context = self.retrieve_context(user_input, lang)
+            if lang == "ar":
+                # Improved Arabic Prompt
+                input_text = (
+                    f"أنت خبير في رؤية السعودية 2030.\n"
+                    f"إليك بعض المعلومات المهمة:\n{context}\n\n"
+                    f"مثال:\n"
+                    f"السؤال: ما هي ركائز رؤية 2030؟\n"
+                    f"الإجابة: ركائز رؤية 2030 هي مجتمع حيوي، اقتصاد مزدهر، ووطن طموح.\n\n"
+                    f"أجب عن سؤال المستخدم بشكل واضح ودقيق، مستندًا إلى المعلومات المقدمة. إذا لم تكن المعلومات متوفرة، أوضح ذلك.\n"
+                    f"السؤال: {user_input}\n"
+                    f"الإجابة:"
+                )
+                response = self.arabic_pipe(input_text, max_new_tokens=256, do_sample=True, temperature=0.7)
+                full_text = response[0]['generated_text']
+                # Extract the answer part
+                answer_pattern = r"الإجابة:(.*?)(?:$)"
+                match = re.search(answer_pattern, full_text, re.DOTALL)
+                if match:
+                    reply = match.group(1).strip()
+                else:
+                    reply = full_text
+            else:
+                # Improved English Prompt
+                input_text = (
+                    f"You are an expert on Saudi Arabia's Vision 2030.\n"
+                    f"Here is some relevant information:\n{context}\n\n"
+                    f"Example:\n"
+                    f"Question: What are the key pillars of Vision 2030?\n"
+                    f"Answer: The key pillars are a vibrant society, a thriving economy, and an ambitious nation.\n\n"
+                    f"Answer the user's question clearly and accurately based on the provided information. If information is not available, make that clear.\n"
+                    f"Question: {user_input}\n"
+                    f"Answer:"
+                )
+                response = self.english_pipe(input_text, max_new_tokens=256, do_sample=True, temperature=0.7)
+                full_text = response[0]['generated_text']
+                # Extract the answer part
+                answer_pattern = r"Answer:(.*?)(?:$)"
+                match = re.search(answer_pattern, full_text, re.DOTALL)
+                if match:
+                    reply = match.group(1).strip()
+                else:
+                    reply = full_text
+        except Exception as e:
+            logger.error(f"Error generating response: {str(e)}")
+            reply = default_response.get(lang, default_response["en"])
+        # Record response time
+        response_time = time.time() - start_time
+        self.metrics["response_times"].append(response_time)
+        logger.info(f"Generated response in {response_time:.2f}s")
+        # Store the interaction for later evaluation
+        interaction = {
+            "timestamp": datetime.now().isoformat(),
+            "user_input": user_input,
+            "response": reply,
+            "language": lang,
+            "response_time": response_time
+        }
+        self.response_history.append(interaction)
+        return reply
+    def evaluate_factual_accuracy(self, response, reference):
+        """Simple evaluation of factual accuracy by keyword matching"""
+        # This is a simplified approach - in production, use more sophisticated methods
+        keywords_reference = set(re.findall(r'\b\w+\b', reference.lower()))
+        keywords_response = set(re.findall(r'\b\w+\b', response.lower()))
+        common_keywords = keywords_reference.intersection(keywords_response)
+        if len(keywords_reference) > 0:
+            accuracy = len(common_keywords) / len(keywords_reference)
+        else:
+            accuracy = 0
+        return accuracy
+    def evaluate_on_test_set(self):
+        """Evaluate the assistant on the test set"""
+        logger.info("Running evaluation on test set")
+        eval_results = []
+        for example in self.eval_data:
+            # Generate response
+            response = self.generate_response(example["question"])
+            # Calculate factual accuracy
+            accuracy = self.evaluate_factual_accuracy(response, example["reference_answer"])
+            eval_results.append({
+                "question": example["question"],
+                "reference": example["reference_answer"],
+                "response": response,
+                "factual_accuracy": accuracy
+            })
+            self.metrics["factual_accuracy"].append(accuracy)
+        # Calculate average factual accuracy
+        avg_accuracy = sum(self.metrics["factual_accuracy"]) / len(self.metrics["factual_accuracy"]) if self.metrics["factual_accuracy"] else 0
+        avg_response_time = sum(self.metrics["response_times"]) / len(self.metrics["response_times"]) if self.metrics["response_times"] else 0
+        results = {
+            "average_factual_accuracy": avg_accuracy,
+            "average_response_time": avg_response_time,
+            "detailed_results": eval_results
+        }
+        logger.info(f"Evaluation results: Factual accuracy = {avg_accuracy:.2f}, Avg response time = {avg_response_time:.2f}s")
+        return results
+    def record_user_feedback(self, user_input, response, rating, feedback_text=""):
+        """Record user feedback for a response"""
+        feedback = {
+            "timestamp": datetime.now().isoformat(),
+            "user_input": user_input,
+            "response": response,
+            "rating": rating,
+            "feedback_text": feedback_text
+        }
+        self.metrics["user_ratings"].append(rating)
+        # In a production system, store this in a database
+        logger.info(f"Recorded user feedback: rating={rating}")
+        return True
+    def save_evaluation_metrics(self, output_path="evaluation_metrics.json"):
+        """Save evaluation metrics to a file"""
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                json.dump({
+                    "response_times": self.metrics["response_times"],
+                    "user_ratings": self.metrics["user_ratings"],
+                    "factual_accuracy": self.metrics["factual_accuracy"],
+                    "average_factual_accuracy": sum(self.metrics["factual_accuracy"]) / len(self.metrics["factual_accuracy"]) if self.metrics["factual_accuracy"] else 0,
+                    "average_response_time": sum(self.metrics["response_times"]) / len(self.metrics["response_times"]) if self.metrics["response_times"] else 0,
+                    "average_user_rating": sum(self.metrics["user_ratings"]) / len(self.metrics["user_ratings"]) if self.metrics["user_ratings"] else 0,
+                    "timestamp": datetime.now().isoformat()
+                }, f, indent=2)
+            logger.info(f"Saved evaluation metrics to {output_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Error saving evaluation metrics: {str(e)}")
+            return False
+# --- Gradio UI --- #
+def create_gradio_interface():
+    # Initialize the assistant
+    assistant = Vision2030Assistant()
+    # Track conversation history
+    conversation_history = []
     def chat(message, history):
+        if not message:
+            return history, ""
+        # Generate response
+        reply = assistant.generate_response(message)
+        # Update history
         history.append((message, reply))
         return history, ""
+    def provide_feedback(message, rating, feedback_text):
+        # Find the most recent interaction
+        if conversation_history:
+            last_interaction = conversation_history[-1]
+            assistant.record_user_feedback(last_interaction[0], last_interaction[1], rating, feedback_text)
+            return f"Thank you for your feedback! (Rating: {rating}/5)"
+        return "No conversation found to rate."
+    def clear_history():
+        conversation_history.clear()
+        return []
+    def download_metrics():
+        assistant.save_evaluation_metrics()
+        return "evaluation_metrics.json"
+    def run_evaluation():
+        results = assistant.evaluate_on_test_set()
+        return f"Evaluation Results:\nFactual Accuracy: {results['average_factual_accuracy']:.2f}\nAverage Response Time: {results['average_response_time']:.2f}s"
+    # Create Gradio interface
+    with gr.Blocks() as demo:
+        gr.Markdown("# Vision 2030 Virtual Assistant 🌍\n\nAsk questions about Saudi Vision 2030 in Arabic or English")
+        with gr.Tab("Chat"):
+            chatbot = gr.Chatbot(show_label=False)
+            msg = gr.Textbox(label="Ask me anything about Vision 2030", placeholder="Type your question here...")
+            clear = gr.Button("Clear Conversation")
+            with gr.Row():
+                with gr.Column(scale=4):
+                    feedback_text = gr.Textbox(label="Provide additional feedback (optional)")
+                with gr.Column(scale=1):
+                    rating = gr.Slider(label="Rate Response (1-5)", minimum=1, maximum=5, step=1, value=3)
+            submit_feedback = gr.Button("Submit Feedback")
+            feedback_result = gr.Textbox(label="Feedback Status")
+            # Set up event handlers
+            msg.submit(chat, [msg, chatbot], [chatbot, msg])
+            clear.click(clear_history, None, chatbot)
+            submit_feedback.click(provide_feedback, [msg, rating, feedback_text], feedback_result)
+        with gr.Tab("Evaluation"):
+            eval_button = gr.Button("Run Evaluation on Test Set")
+            eval_results = gr.Textbox(label="Evaluation Results")
+            download_button = gr.Button("Download Metrics")
+            download_file = gr.File(label="Download evaluation metrics as JSON")
+            # Set up evaluation handlers
+            eval_button.click(run_evaluation, None, eval_results)
+            download_button.click(download_metrics, None, download_file)
+        with gr.Tab("About"):
+            gr.Markdown("""
+            ## About Vision 2030 Virtual Assistant
+            This assistant uses a combination of state-of-the-art language models to answer questions about Saudi Arabia's Vision 2030 strategic framework in both Arabic and English.
+            ### Features:
+            - Bilingual support (Arabic and English)
+            - Retrieval-Augmented Generation (RAG) for factual accuracy
+            - Evaluation framework for measuring performance
+            - User feedback collection for continuous improvement
+            ### Models Used:
+            - Arabic: ALLaM-7B-Instruct-preview
+            - English: Mistral-7B-Instruct-v0.2
+            - Embeddings: CAMeL-Lab/bert-base-arabic-camelbert-ca and sentence-transformers/all-MiniLM-L6-v2
+            This project demonstrates the application of advanced NLP techniques for multilingual question answering, particularly for Arabic language support.
+            """)
+    return demo
+# Launch the application
+if __name__ == "__main__":
+    demo = create_gradio_interface()
+    demo.launch()