Spaces:

logasanjeev
/

sentiment-analysis-bilstm-luong-demo

Sleeping

File size: 6,198 Bytes

# app.py
import gradio as gr
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
import pickle
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import string
from huggingface_hub import hf_hub_download
import warnings
from sklearn.exceptions import InconsistentVersionWarning

# Suppress scikit-learn warning
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)

# Download NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

# Define LuongAttention
class LuongAttention(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(LuongAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(
            name='attention_weight',
            shape=(input_shape[-1], input_shape[-1]),
            initializer='glorot_uniform',
            trainable=True
        )
        self.b = self.add_weight(
            name='attention_bias',
            shape=(input_shape[-1],),
            initializer='zeros',
            trainable=True
        )
        super(LuongAttention, self).build(input_shape)

    def call(self, inputs):
        lstm_output = inputs
        score = tf.matmul(lstm_output, self.W) + self.b
        score = tf.tanh(score)
        attention_weights = tf.nn.softmax(score, axis=1)
        context = lstm_output * attention_weights
        context = tf.reduce_sum(context, axis=1)
        return context, attention_weights

    def get_config(self):
        config = super(LuongAttention, self).get_config()
        return config

# Load model, tokenizer, label encoder from Hugging Face Hub
model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.json")
encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
model = load_model(model_path, custom_objects={"LuongAttention": LuongAttention})
with open(tokenizer_path, "r") as f:
    tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json.load(f))
with open(encoder_path, "rb") as f:
    label_encoder = pickle.load(f)

# Text cleaning function
def clean_text(text):
    if not isinstance(text, str):
        text = str(text)
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'@\w+|\#\w+', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = re.sub(r'\d+', '', text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens).strip()

# Prediction function
def predict_sentiment(text):
    if not text or not isinstance(text, str) or len(text.strip()) < 3:
        return "Please enter a valid sentence.", None, None
    
    # Clean and preprocess
    cleaned = clean_text(text)
    seq = tokenizer.texts_to_sequences([cleaned])
    if not seq or not any(x > 1 for x in seq[0]):
        return "Text too short or invalid.", None, None
    
    # Pad sequence
    max_len = 35
    pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
    
    # Predict
    with tf.device('/CPU:0'):
        pred = model.predict(pad, verbose=0)[0]
    sentiment = label_encoder.inverse_transform([np.argmax(pred)])[0]
    probs = pred.tolist()
    
    # Format output
    emoji = {"negative": "😣", "neutral": "😐", "positive": "😊"}
    probs_dict = {
        "Negative": probs[0],
        "Neutral": probs[1],
        "Positive": probs[2]
    }
    
    return (
        f"**Sentiment**: {sentiment.capitalize()} {emoji[sentiment]}",
        probs_dict,
        cleaned
    )

# Custom CSS for slick UI
css = """
body { font-family: 'Arial', sans-serif; }
.gradio-container { max-width: 800px; margin: auto; }
h1 { color: #1a73e8; text-align: center; }
.textbox { border-radius: 8px; }
.output-text { font-size: 1.2em; font-weight: bold; }
.footer { text-align: center; color: #666; }
.prob-bar { margin-top: 10px; }
button { border-radius: 6px; }
"""

# Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    gr.Markdown(
        """
        # Sentiment Analysis App
        Predict the sentiment of your text (negative, neutral, positive) using a Bi-LSTM model with Luong attention. Try it out!
        """
    )
    
    with gr.Row():
        with gr.Column(scale=3):
            text_input = gr.Textbox(
                label="Your Text",
                placeholder="e.g., The food service is not good at all",
                lines=2
            )
            predict_btn = gr.Button("Analyze Sentiment", variant="primary")
        with gr.Column(scale=1):
            theme_toggle = gr.Button("Toggle Theme")
    
    output_text = gr.Markdown()
    prob_plot = gr.Label(label="Probability Distribution")
    cleaned_text = gr.Textbox(label="Cleaned Text", interactive=False)
    
    examples = gr.Examples(
        examples=[
            "the food service is not good at all",
            "this is not recommended at all",
            "This place sucks!",
            "I’m so happy with this!",
            "It’s alright, I guess."
        ],
        inputs=text_input
    )
    
    # Theme toggle logic
    def toggle_theme():
        return {"theme": gr.themes.Dark()} if demo.theme.name == "soft" else {"theme": gr.themes.Soft()}
    
    # Bind functions
    predict_btn.click(
        fn=predict_sentiment,
        inputs=text_input,
        outputs=[output_text, prob_plot, cleaned_text]
    )
    theme_toggle.click(
        fn=toggle_theme,
        inputs=None,
        outputs=[demo]
    )
    
    gr.Markdown(
        """
        <div class='footer'>
            Created by logasanjeev | Powered by Hugging Face & Gradio
        </div>
        """
    )

# Launch app
demo.launch()