logasanjeev's picture
Update app.py
c55e8f8 verified
# app.py
import gradio as gr
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
import pickle
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import contractions
from huggingface_hub import hf_hub_download
import warnings
from sklearn.exceptions import InconsistentVersionWarning
# Suppress scikit-learn warning
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
# Download NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()
# Define LuongAttention (matches training)
class LuongAttention(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(LuongAttention, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(name='attention_weight',
shape=(input_shape[-1], input_shape[-1]),
initializer='glorot_normal',
trainable=True)
self.b = self.add_weight(name='attention_bias',
shape=(input_shape[-1],),
initializer='zeros',
trainable=True)
super(LuongAttention, self).build(input_shape)
def call(self, inputs):
e = tf.keras.backend.tanh(tf.keras.backend.dot(inputs, self.W) + self.b)
alpha = tf.keras.backend.softmax(e, axis=1)
context = inputs * alpha
context = tf.keras.backend.sum(context, axis=1)
return context
def get_config(self):
config = super(LuongAttention, self).get_config()
return config
# Load model, tokenizer, label encoder from Hugging Face Hub
model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.pkl")
encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
model = load_model(
model_path,
custom_objects={
"LuongAttention": LuongAttention,
"focal_loss_fn": lambda y_true, y_pred: y_true # Placeholder for custom loss
}
)
with open(tokenizer_path, "rb") as f:
tokenizer = pickle.load(f)
with open(encoder_path, "rb") as f:
label_encoder = pickle.load(f)
# Optimal threshold from training
OPTIMAL_THRESHOLD = 0.5173
# Text cleaning function (matches training)
def clean_text(text):
if not isinstance(text, str):
text = str(text)
# Expand contractions
text = contractions.fix(text)
# Convert to lowercase
text = text.lower()
# Remove URLs
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
# Remove usernames and hashtags
text = re.sub(r'@\w+|#\w+', '', text)
# Remove HTML tags
text = re.sub(r'<.*?>+', '', text)
# Remove newlines
text = re.sub(r'\n', '', text)
# Remove numbers
text = re.sub(r'\w*\d\w*', '', text)
# Remove special characters
text = re.sub(r'[^\w\s]', '', text)
# Remove extra spaces
text = ' '.join(text.split())
# Tokenize and lemmatize
tokens = word_tokenize(text)
tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
return ' '.join(tokens).strip()
# Prediction function
def predict_sentiment(text):
if not text or not isinstance(text, str) or len(text.strip()) < 3:
return "Please enter a valid sentence.", None, None
# Clean and preprocess
cleaned = clean_text(text)
seq = tokenizer.texts_to_sequences([cleaned])
if not seq or not any(x > 1 for x in seq[0]):
return "Text too short or invalid.", None, None
# Pad sequence
max_len = 60
pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
# Predict
with tf.device('/CPU:0'):
prob = model.predict(pad, verbose=0)[0][0]
# Apply threshold
label_idx = (prob >= OPTIMAL_THRESHOLD).astype(int)
sentiment = label_encoder.inverse_transform([label_idx])[0].lower()
confidence = prob if sentiment == 'positive' else 1 - prob
# Format output
emoji = {"negative": "😣", "positive": "😊"}
probs_dict = {
"Negative": 1 - prob,
"Positive": prob
}
return (
f"**Sentiment**: {sentiment.capitalize()} {emoji[sentiment]}",
probs_dict,
cleaned
)
# Custom CSS for sleek UI
css = """
body { font-family: 'Arial', sans-serif; }
.gradio-container { max-width: 800px; margin: auto; }
h1 { color: #1a73e8; text-align: center; }
.textbox { border-radius: 8px; }
.output-text { font-size: 1.2em; font-weight: bold; }
.footer { text-align: center; color: #666; }
.prob-bar { margin-top: 10px; }
button { border-radius: 6px; }
"""
# Gradio interface
with gr.Blocks(theme="soft", css=css) as demo:
gr.Markdown(
"""
# Sentiment Analysis App
Predict the sentiment of your text (Negative or Positive) using a BiLSTM model with Luong attention. Optimized threshold (0.5173) for 86.58% accuracy. Try it out!
"""
)
with gr.Row():
with gr.Column(scale=3):
text_input = gr.Textbox(
label="Your Text",
placeholder="e.g., I wouldn't recommend it to anyone",
lines=2
)
predict_btn = gr.Button("Analyze Sentiment", variant="primary")
output_text = gr.Markdown()
prob_plot = gr.Label(label="Probability Distribution")
cleaned_text = gr.Textbox(label="Cleaned Text", interactive=False)
examples = gr.Examples(
examples=[
"Not bad at all.",
"Just what I needed today — a flat tire and a rainstorm. Living the dream!",
"The movie was visually stunning, but the story was painfully slow.",
"I wouldn’t recommend it to someone I like.",
"For once, he didn’t mess it up."
],
inputs=text_input
)
# Bind predict function
predict_btn.click(
fn=predict_sentiment,
inputs=text_input,
outputs=[output_text, prob_plot, cleaned_text]
)
gr.Markdown(
"""
<div class='footer'>
Created by logasanjeev | Powered by Hugging Face & Gradio
</div>
"""
)
# Launch app
demo.launch()