logasanjeev's picture
Update app.py
cb99758 verified
raw
history blame
6.2 kB
# app.py
import gradio as gr
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
import pickle
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
import string
from huggingface_hub import hf_hub_download
import warnings
from sklearn.exceptions import InconsistentVersionWarning
# Suppress scikit-learn warning
warnings.filterwarnings("ignore", category=InconsistentVersionWarning)
# Download NLTK resources
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('omw-1.4', quiet=True)
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()
# Define LuongAttention
class LuongAttention(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(LuongAttention, self).__init__(**kwargs)
def build(self, input_shape):
self.W = self.add_weight(
name='attention_weight',
shape=(input_shape[-1], input_shape[-1]),
initializer='glorot_uniform',
trainable=True
)
self.b = self.add_weight(
name='attention_bias',
shape=(input_shape[-1],),
initializer='zeros',
trainable=True
)
super(LuongAttention, self).build(input_shape)
def call(self, inputs):
lstm_output = inputs
score = tf.matmul(lstm_output, self.W) + self.b
score = tf.tanh(score)
attention_weights = tf.nn.softmax(score, axis=1)
context = lstm_output * attention_weights
context = tf.reduce_sum(context, axis=1)
return context, attention_weights
def get_config(self):
config = super(LuongAttention, self).get_config()
return config
# Load model, tokenizer, label encoder from Hugging Face Hub
model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.json")
encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
model = load_model(model_path, custom_objects={"LuongAttention": LuongAttention})
with open(tokenizer_path, "r") as f:
tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json.load(f))
with open(encoder_path, "rb") as f:
label_encoder = pickle.load(f)
# Text cleaning function
def clean_text(text):
if not isinstance(text, str):
text = str(text)
text = text.lower()
text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
text = re.sub(r'@\w+|\#\w+', '', text)
text = text.translate(str.maketrans('', '', string.punctuation))
text = re.sub(r'\d+', '', text)
tokens = word_tokenize(text)
tokens = [lemmatizer.lemmatize(token) for token in tokens]
return ' '.join(tokens).strip()
# Prediction function
def predict_sentiment(text):
if not text or not isinstance(text, str) or len(text.strip()) < 3:
return "Please enter a valid sentence.", None, None
# Clean and preprocess
cleaned = clean_text(text)
seq = tokenizer.texts_to_sequences([cleaned])
if not seq or not any(x > 1 for x in seq[0]):
return "Text too short or invalid.", None, None
# Pad sequence
max_len = 35
pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
# Predict
with tf.device('/CPU:0'):
pred = model.predict(pad, verbose=0)[0]
sentiment = label_encoder.inverse_transform([np.argmax(pred)])[0]
probs = pred.tolist()
# Format output
emoji = {"negative": "😣", "neutral": "😐", "positive": "😊"}
probs_dict = {
"Negative": probs[0],
"Neutral": probs[1],
"Positive": probs[2]
}
return (
f"**Sentiment**: {sentiment.capitalize()} {emoji[sentiment]}",
probs_dict,
cleaned
)
# Custom CSS for slick UI
css = """
body { font-family: 'Arial', sans-serif; }
.gradio-container { max-width: 800px; margin: auto; }
h1 { color: #1a73e8; text-align: center; }
.textbox { border-radius: 8px; }
.output-text { font-size: 1.2em; font-weight: bold; }
.footer { text-align: center; color: #666; }
.prob-bar { margin-top: 10px; }
button { border-radius: 6px; }
"""
# Gradio interface
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
gr.Markdown(
"""
# Sentiment Analysis App
Predict the sentiment of your text (negative, neutral, positive) using a Bi-LSTM model with Luong attention. Try it out!
"""
)
with gr.Row():
with gr.Column(scale=3):
text_input = gr.Textbox(
label="Your Text",
placeholder="e.g., The food service is not good at all",
lines=2
)
predict_btn = gr.Button("Analyze Sentiment", variant="primary")
with gr.Column(scale=1):
theme_toggle = gr.Button("Toggle Theme")
output_text = gr.Markdown()
prob_plot = gr.Label(label="Probability Distribution")
cleaned_text = gr.Textbox(label="Cleaned Text", interactive=False)
examples = gr.Examples(
examples=[
"the food service is not good at all",
"this is not recommended at all",
"This place sucks!",
"I’m so happy with this!",
"It’s alright, I guess."
],
inputs=text_input
)
# Theme toggle logic
def toggle_theme():
return {"theme": gr.themes.Dark()} if demo.theme.name == "soft" else {"theme": gr.themes.Soft()}
# Bind functions
predict_btn.click(
fn=predict_sentiment,
inputs=text_input,
outputs=[output_text, prob_plot, cleaned_text]
)
theme_toggle.click(
fn=toggle_theme,
inputs=None,
outputs=[demo]
)
gr.Markdown(
"""
<div class='footer'>
Created by logasanjeev | Powered by Hugging Face & Gradio
</div>
"""
)
# Launch app
demo.launch()