Spaces:

logasanjeev
/

sentiment-analysis-bilstm-luong-demo

Running

App Files Files Community

sentiment-analysis-bilstm-luong-demo / app.py

logasanjeev

Update app.py

cb99758 verified about 1 month ago

raw

history blame

6.2 kB

	# app.py
	import gradio as gr
	import tensorflow as tf
	from tensorflow.keras.models import load_model
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import numpy as np
	import json
	import pickle
	import nltk
	from nltk.tokenize import word_tokenize
	from nltk.stem import WordNetLemmatizer
	import re
	import string
	from huggingface_hub import hf_hub_download
	import warnings
	from sklearn.exceptions import InconsistentVersionWarning

	# Suppress scikit-learn warning
	warnings.filterwarnings("ignore", category=InconsistentVersionWarning)

	# Download NLTK resources
	nltk.download('punkt', quiet=True)
	nltk.download('wordnet', quiet=True)
	nltk.download('omw-1.4', quiet=True)

	# Initialize lemmatizer
	lemmatizer = WordNetLemmatizer()

	# Define LuongAttention
	class LuongAttention(tf.keras.layers.Layer):
	def __init__(self, **kwargs):
	super(LuongAttention, self).__init__(**kwargs)

	def build(self, input_shape):
	self.W = self.add_weight(
	name='attention_weight',
	shape=(input_shape[-1], input_shape[-1]),
	initializer='glorot_uniform',
	trainable=True
	)
	self.b = self.add_weight(
	name='attention_bias',
	shape=(input_shape[-1],),
	initializer='zeros',
	trainable=True
	)
	super(LuongAttention, self).build(input_shape)

	def call(self, inputs):
	lstm_output = inputs
	score = tf.matmul(lstm_output, self.W) + self.b
	score = tf.tanh(score)
	attention_weights = tf.nn.softmax(score, axis=1)
	context = lstm_output * attention_weights
	context = tf.reduce_sum(context, axis=1)
	return context, attention_weights

	def get_config(self):
	config = super(LuongAttention, self).get_config()
	return config

	# Load model, tokenizer, label encoder from Hugging Face Hub
	model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
	tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.json")
	encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
	model = load_model(model_path, custom_objects={"LuongAttention": LuongAttention})
	with open(tokenizer_path, "r") as f:
	tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json.load(f))
	with open(encoder_path, "rb") as f:
	label_encoder = pickle.load(f)

	# Text cleaning function
	def clean_text(text):
	if not isinstance(text, str):
	text = str(text)
	text = text.lower()
	text = re.sub(r'http\S+\|www\S+\|https\S+', '', text, flags=re.MULTILINE)
	text = re.sub(r'@\w+\|\#\w+', '', text)
	text = text.translate(str.maketrans('', '', string.punctuation))
	text = re.sub(r'\d+', '', text)
	tokens = word_tokenize(text)
	tokens = [lemmatizer.lemmatize(token) for token in tokens]
	return ' '.join(tokens).strip()

	# Prediction function
	def predict_sentiment(text):
	if not text or not isinstance(text, str) or len(text.strip()) < 3:
	return "Please enter a valid sentence.", None, None

	# Clean and preprocess
	cleaned = clean_text(text)
	seq = tokenizer.texts_to_sequences([cleaned])
	if not seq or not any(x > 1 for x in seq[0]):
	return "Text too short or invalid.", None, None

	# Pad sequence
	max_len = 35
	pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')

	# Predict
	with tf.device('/CPU:0'):
	pred = model.predict(pad, verbose=0)[0]
	sentiment = label_encoder.inverse_transform([np.argmax(pred)])[0]
	probs = pred.tolist()

	# Format output
	emoji = {"negative": "😣", "neutral": "😐", "positive": "😊"}
	probs_dict = {
	"Negative": probs[0],
	"Neutral": probs[1],
	"Positive": probs[2]
	}

	return (
	f"Sentiment: {sentiment.capitalize()} {emoji[sentiment]}",
	probs_dict,
	cleaned
	)

	# Custom CSS for slick UI
	css = """
	body { font-family: 'Arial', sans-serif; }
	.gradio-container { max-width: 800px; margin: auto; }
	h1 { color: #1a73e8; text-align: center; }
	.textbox { border-radius: 8px; }
	.output-text { font-size: 1.2em; font-weight: bold; }
	.footer { text-align: center; color: #666; }
	.prob-bar { margin-top: 10px; }
	button { border-radius: 6px; }
	"""

	# Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
	gr.Markdown(
	"""
	# Sentiment Analysis App
	Predict the sentiment of your text (negative, neutral, positive) using a Bi-LSTM model with Luong attention. Try it out!
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	text_input = gr.Textbox(
	label="Your Text",
	placeholder="e.g., The food service is not good at all",
	lines=2
	)
	predict_btn = gr.Button("Analyze Sentiment", variant="primary")
	with gr.Column(scale=1):
	theme_toggle = gr.Button("Toggle Theme")

	output_text = gr.Markdown()
	prob_plot = gr.Label(label="Probability Distribution")
	cleaned_text = gr.Textbox(label="Cleaned Text", interactive=False)

	examples = gr.Examples(
	examples=[
	"the food service is not good at all",
	"this is not recommended at all",
	"This place sucks!",
	"I’m so happy with this!",
	"It’s alright, I guess."
	],
	inputs=text_input
	)

	# Theme toggle logic
	def toggle_theme():
	return {"theme": gr.themes.Dark()} if demo.theme.name == "soft" else {"theme": gr.themes.Soft()}

	# Bind functions
	predict_btn.click(
	fn=predict_sentiment,
	inputs=text_input,
	outputs=[output_text, prob_plot, cleaned_text]
	)
	theme_toggle.click(
	fn=toggle_theme,
	inputs=None,
	outputs=[demo]
	)

	gr.Markdown(
	"""
	<div class='footer'>
	Created by logasanjeev \| Powered by Hugging Face & Gradio
	</div>
	"""
	)

	# Launch app
	demo.launch()