# app.py import gradio as gr import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.sequence import pad_sequences import numpy as np import json import pickle import nltk from nltk.tokenize import word_tokenize from nltk.stem import WordNetLemmatizer import re import contractions from huggingface_hub import hf_hub_download import warnings from sklearn.exceptions import InconsistentVersionWarning # Suppress scikit-learn warning warnings.filterwarnings("ignore", category=InconsistentVersionWarning) # Download NLTK resources nltk.download('punkt', quiet=True) nltk.download('punkt_tab', quiet=True) nltk.download('wordnet', quiet=True) nltk.download('omw-1.4', quiet=True) # Initialize lemmatizer lemmatizer = WordNetLemmatizer() # Define LuongAttention (matches training) class LuongAttention(tf.keras.layers.Layer): def __init__(self, **kwargs): super(LuongAttention, self).__init__(**kwargs) def build(self, input_shape): self.W = self.add_weight(name='attention_weight', shape=(input_shape[-1], input_shape[-1]), initializer='glorot_normal', trainable=True) self.b = self.add_weight(name='attention_bias', shape=(input_shape[-1],), initializer='zeros', trainable=True) super(LuongAttention, self).build(input_shape) def call(self, inputs): e = tf.keras.backend.tanh(tf.keras.backend.dot(inputs, self.W) + self.b) alpha = tf.keras.backend.softmax(e, axis=1) context = inputs * alpha context = tf.keras.backend.sum(context, axis=1) return context def get_config(self): config = super(LuongAttention, self).get_config() return config # Load model, tokenizer, label encoder from Hugging Face Hub model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5") tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.pkl") encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl") model = load_model( model_path, custom_objects={ "LuongAttention": LuongAttention, "focal_loss_fn": lambda y_true, y_pred: y_true # Placeholder for custom loss } ) with open(tokenizer_path, "rb") as f: tokenizer = pickle.load(f) with open(encoder_path, "rb") as f: label_encoder = pickle.load(f) # Optimal threshold from training OPTIMAL_THRESHOLD = 0.5173 # Text cleaning function (matches training) def clean_text(text): if not isinstance(text, str): text = str(text) # Expand contractions text = contractions.fix(text) # Convert to lowercase text = text.lower() # Remove URLs text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE) # Remove usernames and hashtags text = re.sub(r'@\w+|#\w+', '', text) # Remove HTML tags text = re.sub(r'<.*?>+', '', text) # Remove newlines text = re.sub(r'\n', '', text) # Remove numbers text = re.sub(r'\w*\d\w*', '', text) # Remove special characters text = re.sub(r'[^\w\s]', '', text) # Remove extra spaces text = ' '.join(text.split()) # Tokenize and lemmatize tokens = word_tokenize(text) tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens] return ' '.join(tokens).strip() # Prediction function def predict_sentiment(text): if not text or not isinstance(text, str) or len(text.strip()) < 3: return "Please enter a valid sentence.", None, None # Clean and preprocess cleaned = clean_text(text) seq = tokenizer.texts_to_sequences([cleaned]) if not seq or not any(x > 1 for x in seq[0]): return "Text too short or invalid.", None, None # Pad sequence max_len = 60 pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post') # Predict with tf.device('/CPU:0'): prob = model.predict(pad, verbose=0)[0][0] # Apply threshold label_idx = (prob >= OPTIMAL_THRESHOLD).astype(int) sentiment = label_encoder.inverse_transform([label_idx])[0].lower() confidence = prob if sentiment == 'positive' else 1 - prob # Format output emoji = {"negative": "😣", "positive": "😊"} probs_dict = { "Negative": 1 - prob, "Positive": prob } return ( f"**Sentiment**: {sentiment.capitalize()} {emoji[sentiment]}", probs_dict, cleaned ) # Custom CSS for sleek UI css = """ body { font-family: 'Arial', sans-serif; } .gradio-container { max-width: 800px; margin: auto; } h1 { color: #1a73e8; text-align: center; } .textbox { border-radius: 8px; } .output-text { font-size: 1.2em; font-weight: bold; } .footer { text-align: center; color: #666; } .prob-bar { margin-top: 10px; } button { border-radius: 6px; } """ # Gradio interface with gr.Blocks(theme="soft", css=css) as demo: gr.Markdown( """ # Sentiment Analysis App Predict the sentiment of your text (Negative or Positive) using a BiLSTM model with Luong attention. Optimized threshold (0.5173) for 86.58% accuracy. Try it out! """ ) with gr.Row(): with gr.Column(scale=3): text_input = gr.Textbox( label="Your Text", placeholder="e.g., I wouldn't recommend it to anyone", lines=2 ) predict_btn = gr.Button("Analyze Sentiment", variant="primary") output_text = gr.Markdown() prob_plot = gr.Label(label="Probability Distribution") cleaned_text = gr.Textbox(label="Cleaned Text", interactive=False) examples = gr.Examples( examples=[ "Not bad at all.", "Just what I needed today — a flat tire and a rainstorm. Living the dream!", "The movie was visually stunning, but the story was painfully slow.", "I wouldn’t recommend it to someone I like.", "For once, he didn’t mess it up." ], inputs=text_input ) # Bind predict function predict_btn.click( fn=predict_sentiment, inputs=text_input, outputs=[output_text, prob_plot, cleaned_text] ) gr.Markdown( """ """ ) # Launch app demo.launch()