logasanjeev commited on
Commit
adfdacf
Β·
verified Β·
1 Parent(s): 697d739

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -48
app.py CHANGED
@@ -10,7 +10,7 @@ import nltk
10
  from nltk.tokenize import word_tokenize
11
  from nltk.stem import WordNetLemmatizer
12
  import re
13
- import string
14
  from huggingface_hub import hf_hub_download
15
  import warnings
16
  from sklearn.exceptions import InconsistentVersionWarning
@@ -27,60 +27,71 @@ nltk.download('omw-1.4', quiet=True)
27
  # Initialize lemmatizer
28
  lemmatizer = WordNetLemmatizer()
29
 
30
- # Define LuongAttention
31
  class LuongAttention(tf.keras.layers.Layer):
32
  def __init__(self, **kwargs):
33
  super(LuongAttention, self).__init__(**kwargs)
34
-
35
  def build(self, input_shape):
36
- self.W = self.add_weight(
37
- name='attention_weight',
38
- shape=(input_shape[-1], input_shape[-1]),
39
- initializer='glorot_uniform',
40
- trainable=True
41
- )
42
- self.b = self.add_weight(
43
- name='attention_bias',
44
- shape=(input_shape[-1],),
45
- initializer='zeros',
46
- trainable=True
47
- )
48
  super(LuongAttention, self).build(input_shape)
49
-
50
  def call(self, inputs):
51
- lstm_output = inputs
52
- score = tf.matmul(lstm_output, self.W) + self.b
53
- score = tf.tanh(score)
54
- attention_weights = tf.nn.softmax(score, axis=1)
55
- context = lstm_output * attention_weights
56
- context = tf.reduce_sum(context, axis=1)
57
- return context, attention_weights
58
-
59
  def get_config(self):
60
  config = super(LuongAttention, self).get_config()
61
  return config
62
 
63
  # Load model, tokenizer, label encoder from Hugging Face Hub
64
  model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
65
- tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.json")
66
  encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
67
  model = load_model(model_path, custom_objects={"LuongAttention": LuongAttention})
68
- with open(tokenizer_path, "r") as f:
69
- tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(json.load(f))
70
  with open(encoder_path, "rb") as f:
71
  label_encoder = pickle.load(f)
72
 
73
- # Text cleaning function
 
 
 
74
  def clean_text(text):
75
  if not isinstance(text, str):
76
  text = str(text)
 
 
 
77
  text = text.lower()
 
78
  text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
79
- text = re.sub(r'@\w+|\#\w+', '', text)
80
- text = text.translate(str.maketrans('', '', string.punctuation))
81
- text = re.sub(r'\d+', '', text)
 
 
 
 
 
 
 
 
 
 
82
  tokens = word_tokenize(text)
83
- tokens = [lemmatizer.lemmatize(token) for token in tokens]
84
  return ' '.join(tokens).strip()
85
 
86
  # Prediction function
@@ -95,21 +106,23 @@ def predict_sentiment(text):
95
  return "Text too short or invalid.", None, None
96
 
97
  # Pad sequence
98
- max_len = 35
99
  pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
100
 
101
  # Predict
102
  with tf.device('/CPU:0'):
103
- pred = model.predict(pad, verbose=0)[0]
104
- sentiment = label_encoder.inverse_transform([np.argmax(pred)])[0]
105
- probs = pred.tolist()
 
 
 
106
 
107
  # Format output
108
- emoji = {"negative": "😣", "neutral": "😐", "positive": "😊"}
109
  probs_dict = {
110
- "Negative": probs[0],
111
- "Neutral": probs[1],
112
- "Positive": probs[2]
113
  }
114
 
115
  return (
@@ -135,7 +148,7 @@ with gr.Blocks(theme="soft", css=css) as demo:
135
  gr.Markdown(
136
  """
137
  # Sentiment Analysis App
138
- Predict the sentiment of your text (negative, neutral, positive) using a Bi-LSTM model with Luong attention. Try it out!
139
  """
140
  )
141
 
@@ -143,7 +156,7 @@ with gr.Blocks(theme="soft", css=css) as demo:
143
  with gr.Column(scale=3):
144
  text_input = gr.Textbox(
145
  label="Your Text",
146
- placeholder="e.g., The food service is not good at all",
147
  lines=2
148
  )
149
  predict_btn = gr.Button("Analyze Sentiment", variant="primary")
@@ -154,11 +167,11 @@ with gr.Blocks(theme="soft", css=css) as demo:
154
 
155
  examples = gr.Examples(
156
  examples=[
157
- "the food service is not good at all",
158
- "this is not recommended at all",
159
- "This place sucks!",
160
- "I’m so happy with this!",
161
- "It’s alright, I guess."
162
  ],
163
  inputs=text_input
164
  )
@@ -179,4 +192,4 @@ with gr.Blocks(theme="soft", css=css) as demo:
179
  )
180
 
181
  # Launch app
182
- demo.launch()
 
10
  from nltk.tokenize import word_tokenize
11
  from nltk.stem import WordNetLemmatizer
12
  import re
13
+ import contractions
14
  from huggingface_hub import hf_hub_download
15
  import warnings
16
  from sklearn.exceptions import InconsistentVersionWarning
 
27
  # Initialize lemmatizer
28
  lemmatizer = WordNetLemmatizer()
29
 
30
+ # Define LuongAttention (matches training)
31
  class LuongAttention(tf.keras.layers.Layer):
32
  def __init__(self, **kwargs):
33
  super(LuongAttention, self).__init__(**kwargs)
34
+
35
  def build(self, input_shape):
36
+ self.W = self.add_weight(name='attention_weight',
37
+ shape=(input_shape[-1], input_shape[-1]),
38
+ initializer='glorot_normal',
39
+ trainable=True)
40
+ self.b = self.add_weight(name='attention_bias',
41
+ shape=(input_shape[-1],),
42
+ initializer='zeros',
43
+ trainable=True)
 
 
 
 
44
  super(LuongAttention, self).build(input_shape)
45
+
46
  def call(self, inputs):
47
+ e = tf.keras.backend.tanh(tf.keras.backend.dot(inputs, self.W) + self.b)
48
+ alpha = tf.keras.backend.softmax(e, axis=1)
49
+ context = inputs * alpha
50
+ context = tf.keras.backend.sum(context, axis=1)
51
+ return context
52
+
 
 
53
  def get_config(self):
54
  config = super(LuongAttention, self).get_config()
55
  return config
56
 
57
  # Load model, tokenizer, label encoder from Hugging Face Hub
58
  model_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="sentiment_model.h5")
59
+ tokenizer_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="tokenizer.pkl")
60
  encoder_path = hf_hub_download(repo_id="logasanjeev/sentiment-analysis-bilstm-luong", filename="label_encoder.pkl")
61
  model = load_model(model_path, custom_objects={"LuongAttention": LuongAttention})
62
+ with open(tokenizer_path, "rb") as f:
63
+ tokenizer = pickle.load(f)
64
  with open(encoder_path, "rb") as f:
65
  label_encoder = pickle.load(f)
66
 
67
+ # Optimal threshold from training
68
+ OPTIMAL_THRESHOLD = 0.5173
69
+
70
+ # Text cleaning function (matches training)
71
  def clean_text(text):
72
  if not isinstance(text, str):
73
  text = str(text)
74
+ # Expand contractions
75
+ text = contractions.fix(text)
76
+ # Convert to lowercase
77
  text = text.lower()
78
+ # Remove URLs
79
  text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
80
+ # Remove usernames and hashtags
81
+ text = re.sub(r'@\w+|#\w+', '', text)
82
+ # Remove HTML tags
83
+ text = re.sub(r'<.*?>+', '', text)
84
+ # Remove newlines
85
+ text = re.sub(r'\n', '', text)
86
+ # Remove numbers
87
+ text = re.sub(r'\w*\d\w*', '', text)
88
+ # Remove special characters
89
+ text = re.sub(r'[^\w\s]', '', text)
90
+ # Remove extra spaces
91
+ text = ' '.join(text.split())
92
+ # Tokenize and lemmatize
93
  tokens = word_tokenize(text)
94
+ tokens = [lemmatizer.lemmatize(token, pos='v') for token in tokens]
95
  return ' '.join(tokens).strip()
96
 
97
  # Prediction function
 
106
  return "Text too short or invalid.", None, None
107
 
108
  # Pad sequence
109
+ max_len = 60
110
  pad = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')
111
 
112
  # Predict
113
  with tf.device('/CPU:0'):
114
+ prob = model.predict(pad, verbose=0)[0][0]
115
+
116
+ # Apply threshold
117
+ label_idx = (prob >= OPTIMAL_THRESHOLD).astype(int)
118
+ sentiment = label_encoder.inverse_transform([label_idx])[0].lower()
119
+ confidence = prob if sentiment == 'positive' else 1 - prob
120
 
121
  # Format output
122
+ emoji = {"negative": "😣", "positive": "😊"}
123
  probs_dict = {
124
+ "Negative": 1 - prob,
125
+ "Positive": prob
 
126
  }
127
 
128
  return (
 
148
  gr.Markdown(
149
  """
150
  # Sentiment Analysis App
151
+ Predict the sentiment of your text (Negative or Positive) using a BiLSTM model with Luong attention. Optimized threshold (0.5173) for 86.58% accuracy. Try it out!
152
  """
153
  )
154
 
 
156
  with gr.Column(scale=3):
157
  text_input = gr.Textbox(
158
  label="Your Text",
159
+ placeholder="e.g., I wouldn't recommend it to anyone",
160
  lines=2
161
  )
162
  predict_btn = gr.Button("Analyze Sentiment", variant="primary")
 
167
 
168
  examples = gr.Examples(
169
  examples=[
170
+ "I wouldn't recommend it to anyone",
171
+ "The food service is not good at all",
172
+ "This place is awesome!",
173
+ "I'm so happy with this product!",
174
+ "Why does everything go wrong today?"
175
  ],
176
  inputs=text_input
177
  )
 
192
  )
193
 
194
  # Launch app
195
+ demo.launch()