import gradio as gr import torch import faiss import numpy as np from sentence_transformers import SentenceTransformer from langdetect import detect from typing import List, Dict, Tuple, Any from datetime import datetime class MultilingualNewsChatbot: """ A sophisticated multilingual chatbot designed for delivering informative and contextually relevant responses across English and Arabic languages. """ def __init__(self, embedding_model_name: str = 'paraphrase-multilingual-MiniLM-L12-v2', similarity_threshold: float = 0.65): """ Initialize the multilingual news chatbot with advanced semantic search capabilities. Args: embedding_model_name (str): Multilingual sentence embedding model similarity_threshold (float): Minimum similarity score for valid responses """ # Initialize models self.embedding_model = SentenceTransformer(embedding_model_name) self.embedding_dimension = self.embedding_model.get_sentence_embedding_dimension() self.index = faiss.IndexFlatL2(self.embedding_dimension) # Knowledge management self.knowledge_base = [] self.similarity_threshold = similarity_threshold # Multilingual response configurations self.FALLBACK_RESPONSES = { 'ar': "عذرًا، لم نتمكن من العثور على معلومات دقيقة حول استفسارك. هل يمكنك إعادة صياغة السؤال بشكل مختلف؟", 'en': "We apologize, but we couldn't find precise information about your query. Could you rephrase your question?" } # Conversation tracking self.conversation_history = [] # Preload initial knowledge base self._preload_news_knowledge() def _preload_news_knowledge(self): """ Preload a comprehensive multilingual knowledge base with BBC-style informative content. """ news_knowledge_pairs = [ # English News Knowledge { 'questions': [ "What is happening in the Middle East?", "Tell me about current tensions in the region", "Middle East conflict update" ], 'answer': "The Middle East continues to experience complex geopolitical challenges. Recent developments include ongoing diplomatic efforts to reduce tensions, humanitarian concerns, and international diplomatic negotiations aimed at promoting stability in the region.", 'language': 'en', 'category': 'International Politics' }, # Arabic News Knowledge { 'questions': [ "ما هي آخر التطورات في الشرق الأوسط؟", "حدثني عن الوضع الحالي في المنطقة", "تحديث عن الأوضاع السياسية" ], 'answer': "الشرق الأوسط يمر بتحديات جيوسياسية معقدة. تستمر الجهود الدبلوماسية للحد من التوترات، مع التركيز على القضايا الإنسانية والمفاوضات الدولية الهادفة إلى تعزيز الاستقرار في المنطقة.", 'language': 'ar', 'category': 'السياسة الدولية' }, ] # Batch processing of knowledge all_questions = [] all_answers = [] all_languages = [] all_categories = [] for knowledge in news_knowledge_pairs: all_questions.extend(knowledge['questions']) all_answers.extend([knowledge['answer']] * len(knowledge['questions'])) all_languages.extend([knowledge['language']] * len(knowledge['questions'])) all_categories.extend([knowledge.get('category', 'General')] * len(knowledge['questions'])) self.add_knowledge_batch(all_questions, all_answers, all_languages, all_categories) def add_knowledge_batch(self, questions: List[str], answers: List[str], languages: List[str] = None, categories: List[str] = None): """ Add knowledge to the chatbot's database in a batch process. Args: questions (List[str]): List of input questions answers (List[str]): Corresponding answers languages (List[str], optional): Languages of the questions categories (List[str], optional): Content categories """ # Validate input if not (len(questions) == len(answers) == (len(languages) if languages else len(questions)) == (len(categories) if categories else len(questions))): raise ValueError("Input lists must have matching lengths") # Detect languages if not provided if not languages: languages = [detect(q) for q in questions] # Default to 'General' if no categories provided if not categories: categories = ['General'] * len(questions) # Batch embedding question_embeddings = self.embedding_model.encode(questions) # Add to FAISS index if question_embeddings.size > 0: self.index.add(np.array(question_embeddings)) # Store in knowledge base for q, a, lang, cat in zip(questions, answers, languages, categories): self.knowledge_base.append({ 'question': q, 'answer': a, 'language': lang, 'category': cat }) def find_similar_question(self, query: str, top_k: int = 3) -> List[Dict]: """ Perform semantic search to find most relevant questions. Args: query (str): Input query to match top_k (int): Number of top results to return Returns: List of most similar questions with confidence scores """ query_embedding = self.embedding_model.encode(query) distances, indices = self.index.search(np.array([query_embedding]), top_k) results = [] for dist, idx in zip(distances[0], indices[0]): if idx < len(self.knowledge_base): # Convert distance to similarity score similarity = 1 / (1 + dist) result = self.knowledge_base[idx].copy() result.update({ 'similarity_score': similarity, 'distance': dist }) results.append(result) return sorted(results, key=lambda x: x['similarity_score'], reverse=True) def generate_response(self, query: str, include_confidence: bool = False) -> str: """ Generate a contextually appropriate response. Args: query (str): User's input query include_confidence (bool): Whether to include confidence score Returns: str: Generated response """ try: # Detect input language lang = detect(query) # Find most similar questions similar_results = self.find_similar_question(query, top_k=1) # Construct response if similar_results and similar_results[0]['similarity_score'] >= self.similarity_threshold: response = similar_results[0]['answer'] # Optionally add confidence score if include_confidence: confidence = similar_results[0]['similarity_score'] source_info = f"\n\n{'معلومات المصدر:' if lang == 'ar' else 'Source Information:'} " \ f"{'الثقة:' if lang == 'ar' else 'Confidence:'} {confidence:.2%}" response += source_info else: # Fallback response response = self.FALLBACK_RESPONSES.get(lang, self.FALLBACK_RESPONSES['en']) # Log conversation self.conversation_history.append({ 'query': query, 'response': response, 'language': lang, 'timestamp': datetime.now().isoformat() }) return response except Exception as e: # Error handling with multilingual support print(f"Error processing query: {str(e)}") return self.FALLBACK_RESPONSES.get(lang, self.FALLBACK_RESPONSES['en']) def chat_interface(message, history): """ Simplified chat interface function. Args: message (str): User's input message history (list): Conversation history Returns: str: Generated response """ try: # Initialize chatbot if not already done global news_chatbot if 'news_chatbot' not in globals(): news_chatbot = MultilingualNewsChatbot() # Generate response response = news_chatbot.generate_response(message, include_confidence=True) return response except Exception as e: error_response = "Sorry, an error occurred while processing your request." print(f"Interface error: {e}") return error_response # Create Gradio interface demo = gr.ChatInterface( fn=chat_interface, title="🌍 Multilingual News Chatbot", description="Get insights in multiple languages", theme="soft" ) if __name__ == "__main__": demo.launch(debug=True)