import gradio as gr
import torch
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from langdetect import detect
from typing import List, Dict, Tuple, Any
from datetime import datetime

class MultilingualNewsChatbot:
    """
    A sophisticated multilingual chatbot designed for delivering informative 
    and contextually relevant responses across English and Arabic languages.
    """
    def __init__(self, 
                 embedding_model_name: str = 'paraphrase-multilingual-MiniLM-L12-v2',
                 similarity_threshold: float = 0.65):
        """
        Initialize the multilingual news chatbot with advanced semantic search capabilities.
        
        Args:
            embedding_model_name (str): Multilingual sentence embedding model
            similarity_threshold (float): Minimum similarity score for valid responses
        """
        # Initialize models
        self.embedding_model = SentenceTransformer(embedding_model_name)
        self.embedding_dimension = self.embedding_model.get_sentence_embedding_dimension()
        self.index = faiss.IndexFlatL2(self.embedding_dimension)
        
        # Knowledge management
        self.knowledge_base = []
        self.similarity_threshold = similarity_threshold
        
        # Multilingual response configurations
        self.FALLBACK_RESPONSES = {
            'ar': "عذرًا، لم نتمكن من العثور على معلومات دقيقة حول استفسارك. هل يمكنك إعادة صياغة السؤال بشكل مختلف؟",
            'en': "We apologize, but we couldn't find precise information about your query. Could you rephrase your question?"
        }
        
        # Conversation tracking
        self.conversation_history = []
        
        # Preload initial knowledge base
        self._preload_news_knowledge()

    def _preload_news_knowledge(self):
        """
        Preload a comprehensive multilingual knowledge base 
        with BBC-style informative content.
        """
        news_knowledge_pairs = [
            # English News Knowledge
            {
                'questions': [
                    "What is happening in the Middle East?",
                    "Tell me about current tensions in the region",
                    "Middle East conflict update"
                ],
                'answer': "The Middle East continues to experience complex geopolitical challenges. Recent developments include ongoing diplomatic efforts to reduce tensions, humanitarian concerns, and international diplomatic negotiations aimed at promoting stability in the region.",
                'language': 'en',
                'category': 'International Politics'
            },
            # Arabic News Knowledge
            {
                'questions': [
                    "ما هي آخر التطورات في الشرق الأوسط؟",
                    "حدثني عن الوضع الحالي في المنطقة",
                    "تحديث عن الأوضاع السياسية"
                ],
                'answer': "الشرق الأوسط يمر بتحديات جيوسياسية معقدة. تستمر الجهود الدبلوماسية للحد من التوترات، مع التركيز على القضايا الإنسانية والمفاوضات الدولية الهادفة إلى تعزيز الاستقرار في المنطقة.",
                'language': 'ar',
                'category': 'السياسة الدولية'
            },
        ]
        
        # Batch processing of knowledge
        all_questions = []
        all_answers = []
        all_languages = []
        all_categories = []
        
        for knowledge in news_knowledge_pairs:
            all_questions.extend(knowledge['questions'])
            all_answers.extend([knowledge['answer']] * len(knowledge['questions']))
            all_languages.extend([knowledge['language']] * len(knowledge['questions']))
            all_categories.extend([knowledge.get('category', 'General')] * len(knowledge['questions']))
        
        self.add_knowledge_batch(all_questions, all_answers, all_languages, all_categories)

    def add_knowledge_batch(self, 
                             questions: List[str], 
                             answers: List[str], 
                             languages: List[str] = None, 
                             categories: List[str] = None):
        """
        Add knowledge to the chatbot's database in a batch process.
        
        Args:
            questions (List[str]): List of input questions
            answers (List[str]): Corresponding answers
            languages (List[str], optional): Languages of the questions
            categories (List[str], optional): Content categories
        """
        # Validate input
        if not (len(questions) == len(answers) == 
                (len(languages) if languages else len(questions)) == 
                (len(categories) if categories else len(questions))):
            raise ValueError("Input lists must have matching lengths")
        
        # Detect languages if not provided
        if not languages:
            languages = [detect(q) for q in questions]
        
        # Default to 'General' if no categories provided
        if not categories:
            categories = ['General'] * len(questions)
        
        # Batch embedding
        question_embeddings = self.embedding_model.encode(questions)
        
        # Add to FAISS index
        if question_embeddings.size > 0:
            self.index.add(np.array(question_embeddings))
            
            # Store in knowledge base
            for q, a, lang, cat in zip(questions, answers, languages, categories):
                self.knowledge_base.append({
                    'question': q,
                    'answer': a,
                    'language': lang,
                    'category': cat
                })

    def find_similar_question(self, query: str, top_k: int = 3) -> List[Dict]:
        """
        Perform semantic search to find most relevant questions.
        
        Args:
            query (str): Input query to match
            top_k (int): Number of top results to return
        
        Returns:
            List of most similar questions with confidence scores
        """
        query_embedding = self.embedding_model.encode(query)
        distances, indices = self.index.search(np.array([query_embedding]), top_k)
        
        results = []
        for dist, idx in zip(distances[0], indices[0]):
            if idx < len(self.knowledge_base):
                # Convert distance to similarity score
                similarity = 1 / (1 + dist)
                result = self.knowledge_base[idx].copy()
                result.update({
                    'similarity_score': similarity,
                    'distance': dist
                })
                results.append(result)
        
        return sorted(results, key=lambda x: x['similarity_score'], reverse=True)

    def generate_response(self, query: str, include_confidence: bool = False) -> str:
        """
        Generate a contextually appropriate response.
        
        Args:
            query (str): User's input query
            include_confidence (bool): Whether to include confidence score
        
        Returns:
            str: Generated response
        """
        try:
            # Detect input language
            lang = detect(query)
            
            # Find most similar questions
            similar_results = self.find_similar_question(query, top_k=1)
            
            # Construct response
            if similar_results and similar_results[0]['similarity_score'] >= self.similarity_threshold:
                response = similar_results[0]['answer']
                
                # Optionally add confidence score
                if include_confidence:
                    confidence = similar_results[0]['similarity_score']
                    source_info = f"\n\n{'معلومات المصدر:' if lang == 'ar' else 'Source Information:'} " \
                                  f"{'الثقة:' if lang == 'ar' else 'Confidence:'} {confidence:.2%}"
                    response += source_info
            else:
                # Fallback response
                response = self.FALLBACK_RESPONSES.get(lang, self.FALLBACK_RESPONSES['en'])
            
            # Log conversation
            self.conversation_history.append({
                'query': query,
                'response': response,
                'language': lang,
                'timestamp': datetime.now().isoformat()
            })
            
            return response
        
        except Exception as e:
            # Error handling with multilingual support
            print(f"Error processing query: {str(e)}")
            return self.FALLBACK_RESPONSES.get(lang, self.FALLBACK_RESPONSES['en'])

def chat_interface(message, history):
    """
    Simplified chat interface function.
    
    Args:
        message (str): User's input message
        history (list): Conversation history
    
    Returns:
        str: Generated response
    """
    try:
        # Initialize chatbot if not already done
        global news_chatbot
        if 'news_chatbot' not in globals():
            news_chatbot = MultilingualNewsChatbot()
        
        # Generate response
        response = news_chatbot.generate_response(message, include_confidence=True)
        return response
    except Exception as e:
        error_response = "Sorry, an error occurred while processing your request."
        print(f"Interface error: {e}")
        return error_response

# Create Gradio interface
demo = gr.ChatInterface(
    fn=chat_interface,
    title="🌍 Multilingual News Chatbot",
    description="Get insights in multiple languages",
    theme="soft"
)

if __name__ == "__main__":
    demo.launch(debug=True)