# app.py
import gradio as gr
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
import pickle
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import contractions
from huggingface_hub import hf_hub_download
import warnings
from sklearn.exceptions import InconsistentVersionWarning

# Suppress scikit-learn warning
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)

# Download NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Define LuongAttention (matches training)
class LuongAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(LuongAttention, self).__init__(**kwargs)
    
    def build(self, input_shape):
        self.W = self.add_weight(name='attention_weight',
                                 shape=(input_shape[-1], input_shape[-1]),
                                 initializer='glorot_normal',
                                 trainable=True)
        self.b = self.add_weight(name='attention_bias',
                                 shape=(input_shape[-1],),
                                 initializer='zeros',
                                 trainable=True)
        super(LuongAttention, self).build(input_shape)
    
    def call(self, inputs):
        e = tf.keras.backend.tanh(tf.keras.backend.dot(inputs, self.W) + self.b)
        alpha = tf.keras.backend.softmax(e, axis=1)
        context = inputs * alpha
        context = tf.keras.backend.sum(context, axis=1)
        return context
    
    def get_config(self):
        config = super(LuongAttention, self).get_config()
        return config

# Load model, tokenizer, label encoder from Hugging Face Hub
model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.pkl")
encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
model = load_model(
    model_path,
    custom_objects={
        "LuongAttention": LuongAttention,
        "focal_loss_fn": lambda y_true, y_pred: y_true  # Placeholder for custom loss
    }
)
with open(tokenizer_path, "rb") as f:
    tokenizer = pickle.load(f)
with open(encoder_path, "rb") as f:
    label_encoder = pickle.load(f)

# Optimal threshold from training
OPTIMAL_THRESHOLD = 0.5173

# Text cleaning function (matches training)
def clean_text(text):
    if not isinstance(text, str):
        text = str(text)
    # Expand contractions
    text = contractions.fix(text)
    # Convert to lowercase
    text = text.lower()
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Remove usernames and hashtags
    text = re.sub(r'@\w+|#\w+', '', text)
    # Remove HTML tags
    text = re.sub(r'<.*?>+', '', text)
    # Remove newlines
    text = re.sub(r'\n', '', text)
    # Remove numbers
    text = re.sub(r'\w*\d\w*', '', text)
    # Remove special characters
    text = re.sub(r'[^\w\s]', '', text)
    # Remove extra spaces
    text = ' '.join(text.split())
    # Tokenize and lemmatize
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
    return ' '.join(tokens).strip()

# Prediction function
def predict_sentiment(text):
    if not text or not isinstance(text, str) or len(text.strip()) < 3:
        return "Please enter a valid sentence.", None, None
    
    # Clean and preprocess
    cleaned = clean_text(text)
    seq = tokenizer.texts_to_sequences([cleaned])
    if not seq or not any(x > 1 for x in seq[0]):
        return "Text too short or invalid.", None, None
    
    # Pad sequence
    max_len = 60
    pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
    
    # Predict
    with tf.device('/CPU:0'):
        prob = model.predict(pad, verbose=0)[0][0]
    
    # Apply threshold
    label_idx = (prob >= OPTIMAL_THRESHOLD).astype(int)
    sentiment = label_encoder.inverse_transform([label_idx])[0].lower()
    confidence = prob if sentiment == 'positive' else 1 - prob
    
    # Format output
    emoji = {"negative": "😣", "positive": "😊"}
    probs_dict = {
        "Negative": 1 - prob,
        "Positive": prob
    }
    
    return (
        f"**Sentiment**: {sentiment.capitalize()} {emoji[sentiment]}",
        probs_dict,
        cleaned
    )

# Custom CSS for sleek UI
css = """
body { font-family: 'Arial', sans-serif; }
.gradio-container { max-width: 800px; margin: auto; }
h1 { color: #1a73e8; text-align: center; }
.textbox { border-radius: 8px; }
.output-text { font-size: 1.2em; font-weight: bold; }
.footer { text-align: center; color: #666; }
.prob-bar { margin-top: 10px; }
button { border-radius: 6px; }
"""

# Gradio interface
with gr.Blocks(theme="soft", css=css) as demo:
    gr.Markdown(
        """
        # Sentiment Analysis App
        Predict the sentiment of your text (Negative or Positive) using a BiLSTM model with Luong attention. Optimized threshold (0.5173) for 86.58% accuracy. Try it out!
        """
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            text_input = gr.Textbox(
                label="Your Text",
                placeholder="e.g., I wouldn't recommend it to anyone",
                lines=2
            )
            predict_btn = gr.Button("Analyze Sentiment", variant="primary")
    
    output_text = gr.Markdown()
    prob_plot = gr.Label(label="Probability Distribution")
    cleaned_text = gr.Textbox(label="Cleaned Text", interactive=False)
    
    examples = gr.Examples(
        examples=[
            "Not bad at all.",
            "Just what I needed today — a flat tire and a rainstorm. Living the dream!",
            "The movie was visually stunning, but the story was painfully slow.",
            "I wouldn’t recommend it to someone I like.",
            "For once, he didn’t mess it up."
        ],
        inputs=text_input
    )
    
    # Bind predict function
    predict_btn.click(
        fn=predict_sentiment,
        inputs=text_input,
        outputs=[output_text, prob_plot, cleaned_text]
    )
    
    gr.Markdown(
        """
        <div class='footer'>
            Created by logasanjeev | Powered by Hugging Face & Gradio
        </div>
        """
    )

# Launch app
demo.launch()