RishabA commited on
Commit
6addb3e
·
verified ·
1 Parent(s): ff5e38b

Upload 5 files

Browse files
Sentiment_Analysis_in_PyTorch.ipynb ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "machine_shape": "hm",
8
+ "gpuType": "A100"
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ },
17
+ "accelerator": "GPU"
18
+ },
19
+ "cells": [
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": null,
23
+ "metadata": {
24
+ "id": "9gYFoxi68eer"
25
+ },
26
+ "outputs": [],
27
+ "source": [
28
+ "!pip install datasets transformers"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "source": [
34
+ "import pandas as pd\n",
35
+ "import numpy as np\n",
36
+ "import matplotlib.pyplot as plt\n",
37
+ "import os\n",
38
+ "import math\n",
39
+ "import time\n",
40
+ "from tqdm.notebook import trange, tqdm\n",
41
+ "\n",
42
+ "import torch\n",
43
+ "import torch.nn as nn\n",
44
+ "from torch import optim\n",
45
+ "from torch.utils.data import DataLoader\n",
46
+ "from torch import Tensor\n",
47
+ "from torch.utils.data.dataset import Dataset\n",
48
+ "import torch.nn.functional as F\n",
49
+ "from torch.distributions import Categorical\n",
50
+ "from torch.cuda.amp import autocast, GradScaler\n",
51
+ "\n",
52
+ "from datasets import load_dataset\n",
53
+ "from transformers import AutoTokenizer\n",
54
+ "\n",
55
+ "torch.backends.cuda.matmul.allow_tf32 = True\n",
56
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
57
+ "device"
58
+ ],
59
+ "metadata": {
60
+ "id": "rhkTsyBn8j_m"
61
+ },
62
+ "execution_count": null,
63
+ "outputs": []
64
+ },
65
+ {
66
+ "cell_type": "code",
67
+ "source": [
68
+ "train_dataset = load_dataset(\"sst5\", split=\"train\")\n",
69
+ "test_dataset = load_dataset(\"sst5\", split=\"test\")\n",
70
+ "\n",
71
+ "print(f\"Length of train dataset: {len(train_dataset)}\")\n",
72
+ "print(f\"Length of test dataset: {len(test_dataset)}\")"
73
+ ],
74
+ "metadata": {
75
+ "id": "c0wKEehd8lfH"
76
+ },
77
+ "execution_count": null,
78
+ "outputs": []
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "source": [
83
+ "train_dataset[1][\"text\"], train_dataset[1][\"label\"]"
84
+ ],
85
+ "metadata": {
86
+ "id": "Oj6qWm8H8uYK"
87
+ },
88
+ "execution_count": null,
89
+ "outputs": []
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "source": [
94
+ "tokenizer = AutoTokenizer.from_pretrained(\"bert-base-uncased\")"
95
+ ],
96
+ "metadata": {
97
+ "id": "7wbogVwT8ulJ"
98
+ },
99
+ "execution_count": null,
100
+ "outputs": []
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "source": [
105
+ "len(tokenizer.vocab)"
106
+ ],
107
+ "metadata": {
108
+ "id": "tPDFZ3xK8wZb"
109
+ },
110
+ "execution_count": null,
111
+ "outputs": []
112
+ },
113
+ {
114
+ "cell_type": "code",
115
+ "source": [
116
+ "tokenizer.vocab_size"
117
+ ],
118
+ "metadata": {
119
+ "id": "EY2TbtGZ8xdA"
120
+ },
121
+ "execution_count": null,
122
+ "outputs": []
123
+ },
124
+ {
125
+ "cell_type": "code",
126
+ "source": [
127
+ "print(\"[PAD] token id:\", tokenizer.pad_token_id) # 0\n",
128
+ "print(\"[CLS] token id:\", tokenizer.cls_token_id) # 101\n",
129
+ "print(\"[SEP] token id:\", tokenizer.sep_token_id) # 102"
130
+ ],
131
+ "metadata": {
132
+ "id": "1_Wq4KEj81lb"
133
+ },
134
+ "execution_count": null,
135
+ "outputs": []
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "source": [
140
+ "class SST5Dataset(Dataset):\n",
141
+ " def __init__(self, dataset, tokenizer, max_length=128):\n",
142
+ " self.dataset = dataset\n",
143
+ " self.tokenizer = tokenizer\n",
144
+ " self.max_length = max_length\n",
145
+ "\n",
146
+ " def __len__(self):\n",
147
+ " return len(self.dataset)\n",
148
+ "\n",
149
+ " def __getitem__(self, idx):\n",
150
+ " sample = self.dataset[idx]\n",
151
+ " text = sample[\"text\"]\n",
152
+ " label = torch.tensor(sample[\"label\"])\n",
153
+ "\n",
154
+ " encoded_text = self.tokenizer(\n",
155
+ " text,\n",
156
+ " truncation=True,\n",
157
+ " padding=\"max_length\",\n",
158
+ " max_length=self.max_length,\n",
159
+ " return_tensors=\"pt\"\n",
160
+ " )\n",
161
+ "\n",
162
+ " # Remove the extra batch dimension for each item in the encoded dictionary.\n",
163
+ " encoded_text = {key: val.squeeze(dim=0) for key, val in encoded_text.items()}\n",
164
+ "\n",
165
+ " return {\n",
166
+ " \"text\": encoded_text,\n",
167
+ " \"label\": label\n",
168
+ " }\n",
169
+ "\n",
170
+ "train_dataset = SST5Dataset(dataset=train_dataset,\n",
171
+ " tokenizer=tokenizer,\n",
172
+ " max_length=32)\n",
173
+ "\n",
174
+ "test_dataset = SST5Dataset(dataset=test_dataset,\n",
175
+ " tokenizer=tokenizer,\n",
176
+ " max_length=32)"
177
+ ],
178
+ "metadata": {
179
+ "id": "jQY8xfZa-ilL"
180
+ },
181
+ "execution_count": null,
182
+ "outputs": []
183
+ },
184
+ {
185
+ "cell_type": "code",
186
+ "source": [
187
+ "batch_size = 128\n",
188
+ "num_workers = os.cpu_count()\n",
189
+ "\n",
190
+ "train_dataloader = DataLoader(train_dataset,\n",
191
+ " batch_size=batch_size,\n",
192
+ " shuffle=True,\n",
193
+ " num_workers=num_workers,\n",
194
+ " pin_memory=True)\n",
195
+ "\n",
196
+ "test_dataloader = DataLoader(test_dataset,\n",
197
+ " batch_size=batch_size,\n",
198
+ " shuffle=False,\n",
199
+ " num_workers=num_workers,\n",
200
+ " pin_memory=True)"
201
+ ],
202
+ "metadata": {
203
+ "id": "ItktnvlfApqz"
204
+ },
205
+ "execution_count": null,
206
+ "outputs": []
207
+ },
208
+ {
209
+ "cell_type": "code",
210
+ "source": [
211
+ "test_items = next(iter(train_dataloader))\n",
212
+ "print(tokenizer.decode(test_items[\"text\"][\"input_ids\"][0]))"
213
+ ],
214
+ "metadata": {
215
+ "id": "KrroXe5aAtzs"
216
+ },
217
+ "execution_count": null,
218
+ "outputs": []
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "source": [
223
+ "class EmbeddingLayer(nn.Module):\n",
224
+ " def __init__(self,\n",
225
+ " vocab_size: int,\n",
226
+ " d_model: int = 768):\n",
227
+ " super().__init__()\n",
228
+ "\n",
229
+ " self.d_model = d_model\n",
230
+ "\n",
231
+ " self.lut = nn.Embedding(num_embeddings=vocab_size, embedding_dim=d_model) # (vocab_size, d_model)\n",
232
+ "\n",
233
+ " def forward(self, x):\n",
234
+ " # x shape: (batch_size, seq_len)\n",
235
+ " return self.lut(x) * math.sqrt(self.d_model) # (batch_size, seq_len, d_model)"
236
+ ],
237
+ "metadata": {
238
+ "id": "el4Tnb37AvO7"
239
+ },
240
+ "execution_count": null,
241
+ "outputs": []
242
+ },
243
+ {
244
+ "cell_type": "code",
245
+ "source": [
246
+ "class PositionalEncoding(nn.Module):\n",
247
+ " def __init__(self,\n",
248
+ " d_model: int = 768,\n",
249
+ " dropout: float = 0.1,\n",
250
+ " max_length: int = 128):\n",
251
+ " super().__init__()\n",
252
+ "\n",
253
+ " self.dropout = nn.Dropout(p=dropout)\n",
254
+ "\n",
255
+ " pe = torch.zeros(max_length, d_model) # (max_length, d_model)\n",
256
+ " # Create position column\n",
257
+ " k = torch.arange(0, max_length).unsqueeze(dim=1) # (max_length, 1)\n",
258
+ "\n",
259
+ " # Use the log version of the function for positional encodings\n",
260
+ " div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)) # (d_model / 2)\n",
261
+ "\n",
262
+ " # Use sine for the even indices and cosine for the odd indices\n",
263
+ " pe[:, 0::2] = torch.sin(k * div_term)\n",
264
+ " pe[:, 1::2] = torch.cos(k * div_term)\n",
265
+ "\n",
266
+ " pe = pe.unsqueeze(dim=0) # Add the batch dimension(1, max_length, d_model)\n",
267
+ "\n",
268
+ " # We use a buffer because the positional encoding is fixed and not a model paramter that we want to be updated during backpropagation.\n",
269
+ " self.register_buffer(\"pe\", pe) # Buffers are saved with the model state and are moved to the correct device\n",
270
+ "\n",
271
+ " def forward(self, x):\n",
272
+ " # x shape: (batch_size, seq_length, d_model)\n",
273
+ " x += self.pe[:, :x.size(1)]\n",
274
+ " return self.dropout(x)"
275
+ ],
276
+ "metadata": {
277
+ "id": "Qk0sNjc7A6sZ"
278
+ },
279
+ "execution_count": null,
280
+ "outputs": []
281
+ },
282
+ {
283
+ "cell_type": "code",
284
+ "source": [
285
+ "class MultiHeadAttention(nn.Module):\n",
286
+ " def __init__(self,\n",
287
+ " d_model: int = 768,\n",
288
+ " n_heads: int = 8,\n",
289
+ " dropout: float = 0.1):\n",
290
+ " super().__init__()\n",
291
+ " assert d_model % n_heads == 0\n",
292
+ "\n",
293
+ " self.d_model = d_model\n",
294
+ " self.n_heads = n_heads\n",
295
+ " self.d_key = d_model // n_heads\n",
296
+ "\n",
297
+ " self.Wq = nn.Linear(in_features=d_model, out_features=d_model)\n",
298
+ " self.Wk = nn.Linear(in_features=d_model, out_features=d_model)\n",
299
+ " self.Wv = nn.Linear(in_features=d_model, out_features=d_model)\n",
300
+ " self.Wo = nn.Linear(in_features=d_model, out_features=d_model)\n",
301
+ "\n",
302
+ " self.dropout = nn.Dropout(p=dropout)\n",
303
+ "\n",
304
+ "\n",
305
+ " def forward(self,\n",
306
+ " query: Tensor,\n",
307
+ " key: Tensor,\n",
308
+ " value: Tensor,\n",
309
+ " mask: Tensor = None):\n",
310
+ " # input shape: (batch_size, seq_len, d_model)\n",
311
+ "\n",
312
+ " batch_size = key.size(0)\n",
313
+ "\n",
314
+ " Q = self.Wq(query)\n",
315
+ " K = self.Wk(key)\n",
316
+ " V = self.Wv(value)\n",
317
+ "\n",
318
+ " Q = Q.view(batch_size, -1, self.n_heads, self.d_key).permute(0, 2, 1, 3) # (batch_size, n_heads, q_length, d_key)\n",
319
+ " K = K.view(batch_size, -1, self.n_heads, self.d_key).permute(0, 2, 1, 3) # (batch_size, n_heads, k_length, d_key)\n",
320
+ " V = V.view(batch_size, -1, self.n_heads, self.d_key).permute(0, 2, 1, 3) # (batch_size, n_heads, v_length, d_key)\n",
321
+ "\n",
322
+ " scaled_dot_product = torch.matmul(Q, K.permute(0, 1, 3, 2)) / math.sqrt(self.d_key) # (batch_size, n_heads, q_length, k_length)\n",
323
+ "\n",
324
+ " if mask is not None:\n",
325
+ " scaled_dot_product = scaled_dot_product.masked_fill(mask == 0, float('-inf'))\n",
326
+ "\n",
327
+ " attention_probs = torch.softmax(scaled_dot_product, dim=-1)\n",
328
+ "\n",
329
+ " A = torch.matmul(self.dropout(attention_probs), V) # (batch_size, n_heads, q_length, d_key)\n",
330
+ "\n",
331
+ " A = A.permute(0, 2, 1, 3) # (batch_size, q_length, n_heads, d_key)\n",
332
+ " A = A.contiguous().view(batch_size, -1, self.n_heads * self.d_key) # (batch_size, q_length, d_model)\n",
333
+ "\n",
334
+ " output = self.Wo(A) # (batch_size, q_length, d_model)\n",
335
+ "\n",
336
+ " return output, attention_probs"
337
+ ],
338
+ "metadata": {
339
+ "id": "8ugM9m7rA9zL"
340
+ },
341
+ "execution_count": null,
342
+ "outputs": []
343
+ },
344
+ {
345
+ "cell_type": "code",
346
+ "source": [
347
+ "class PositionwiseFeedForward(nn.Module):\n",
348
+ " def __init__(self,\n",
349
+ " d_model: int = 768,\n",
350
+ " dropout: float = 0.1):\n",
351
+ " super().__init__()\n",
352
+ "\n",
353
+ " self.ffn = nn.Sequential(\n",
354
+ " nn.Linear(in_features=d_model, out_features=(d_model * 4)),\n",
355
+ " nn.ReLU(),\n",
356
+ " nn.Linear(in_features=(d_model * 4), out_features=d_model),\n",
357
+ " nn.Dropout(p=dropout)\n",
358
+ " )\n",
359
+ "\n",
360
+ " def forward(self, x):\n",
361
+ " # x shape: (batch_size, q_length, d_model)\n",
362
+ " return self.ffn(x) # (batch_size, q_length, d_model)"
363
+ ],
364
+ "metadata": {
365
+ "id": "kqQGZf6rA_KL"
366
+ },
367
+ "execution_count": null,
368
+ "outputs": []
369
+ },
370
+ {
371
+ "cell_type": "code",
372
+ "source": [
373
+ "class EncoderLayer(nn.Module):\n",
374
+ " def __init__(self,\n",
375
+ " d_model: int = 768,\n",
376
+ " n_heads: int = 8,\n",
377
+ " dropout: float = 0.1):\n",
378
+ " super().__init__()\n",
379
+ "\n",
380
+ " self.attention = MultiHeadAttention(d_model=d_model, n_heads=n_heads, dropout=dropout)\n",
381
+ " self.attention_layer_norm = nn.LayerNorm(d_model)\n",
382
+ "\n",
383
+ " self.position_wise_ffn = PositionwiseFeedForward(d_model=d_model, dropout=dropout)\n",
384
+ " self.ffn_layer_norm = nn.LayerNorm(d_model)\n",
385
+ "\n",
386
+ " self.dropout = nn.Dropout(p=dropout)\n",
387
+ "\n",
388
+ " def forward(self,\n",
389
+ " src: Tensor,\n",
390
+ " src_mask: Tensor):\n",
391
+ " _src, attention_probs = self.attention(query=src, key=src, value=src, mask=src_mask)\n",
392
+ " src = self.attention_layer_norm(src + self.dropout(_src))\n",
393
+ "\n",
394
+ " _src = self.position_wise_ffn(src)\n",
395
+ " src = self.ffn_layer_norm(src + self.dropout(_src))\n",
396
+ "\n",
397
+ " return src, attention_probs"
398
+ ],
399
+ "metadata": {
400
+ "id": "_jypLBCiBDb-"
401
+ },
402
+ "execution_count": null,
403
+ "outputs": []
404
+ },
405
+ {
406
+ "cell_type": "code",
407
+ "source": [
408
+ "class Encoder(nn.Module):\n",
409
+ " def __init__(self,\n",
410
+ " d_model: int = 768,\n",
411
+ " n_layers: int = 3,\n",
412
+ " n_heads: int = 8,\n",
413
+ " dropout: float = 0.1):\n",
414
+ " super().__init__()\n",
415
+ "\n",
416
+ " self.layers = nn.ModuleList([EncoderLayer(d_model=d_model, n_heads=n_heads, dropout=dropout) for layer in range(n_layers)])\n",
417
+ " self.dropout = nn.Dropout(p=dropout)\n",
418
+ "\n",
419
+ " def forward(self,\n",
420
+ " src: Tensor,\n",
421
+ " src_mask: Tensor):\n",
422
+ "\n",
423
+ " for layer in self.layers:\n",
424
+ " src, attention_probs = layer(src, src_mask)\n",
425
+ "\n",
426
+ " self.attention_probs = attention_probs\n",
427
+ "\n",
428
+ " # src += torch.randn_like(src) * 0.001\n",
429
+ " return src"
430
+ ],
431
+ "metadata": {
432
+ "id": "o-cPP_YLBF8y"
433
+ },
434
+ "execution_count": null,
435
+ "outputs": []
436
+ },
437
+ {
438
+ "cell_type": "code",
439
+ "source": [
440
+ "class Transformer(nn.Module):\n",
441
+ " def __init__(self,\n",
442
+ " encoder: Encoder,\n",
443
+ " src_embed: EmbeddingLayer,\n",
444
+ " src_pad_idx: int,\n",
445
+ " device,\n",
446
+ " d_model: int = 768,\n",
447
+ " num_labels: int = 5):\n",
448
+ " super().__init__()\n",
449
+ "\n",
450
+ " self.encoder = encoder\n",
451
+ " self.src_embed = src_embed\n",
452
+ " self.device = device\n",
453
+ " self.src_pad_idx = src_pad_idx\n",
454
+ "\n",
455
+ " self.dropout = nn.Dropout(p=0.1)\n",
456
+ " self.classifier = nn.Linear(in_features=d_model, out_features=num_labels)\n",
457
+ "\n",
458
+ " def make_src_mask(self, src: Tensor):\n",
459
+ " # Assign 1 to tokens that need attended to and 0 to padding tokens, then add 2 dimensions\n",
460
+ " src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)\n",
461
+ "\n",
462
+ " return src_mask\n",
463
+ "\n",
464
+ " def forward(self, src: Tensor):\n",
465
+ " src_mask = self.make_src_mask(src) # (batch_size, 1, 1, src_seq_length)\n",
466
+ " output = self.encoder(self.src_embed(src), src_mask) # (batch_size, src_seq_length, d_model)\n",
467
+ " output = output[:, 0, :] # Get the sos token vector representation (works sort of like a cls token in ViT) shape: (batch_size, 1, d_model)\n",
468
+ " logits = self.classifier(self.dropout(output))\n",
469
+ "\n",
470
+ " return logits"
471
+ ],
472
+ "metadata": {
473
+ "id": "5fcff-6oBX_w"
474
+ },
475
+ "execution_count": null,
476
+ "outputs": []
477
+ },
478
+ {
479
+ "cell_type": "code",
480
+ "source": [
481
+ "def make_model(device,\n",
482
+ " tokenizer,\n",
483
+ " n_layers: int = 3,\n",
484
+ " d_model: int = 768,\n",
485
+ " num_labels: int = 5,\n",
486
+ " n_heads: int = 8,\n",
487
+ " dropout: float = 0.1,\n",
488
+ " max_length: int = 128):\n",
489
+ " encoder = Encoder(d_model=d_model,\n",
490
+ " n_layers=n_layers,\n",
491
+ " n_heads=n_heads,\n",
492
+ " dropout=dropout)\n",
493
+ "\n",
494
+ " src_embed = EmbeddingLayer(vocab_size=tokenizer.vocab_size, d_model=d_model)\n",
495
+ "\n",
496
+ " pos_enc = PositionalEncoding(d_model=d_model,\n",
497
+ " dropout=dropout,\n",
498
+ " max_length=max_length)\n",
499
+ "\n",
500
+ " model = Transformer(encoder=encoder,\n",
501
+ " src_embed=nn.Sequential(src_embed, pos_enc),\n",
502
+ " src_pad_idx=tokenizer.pad_token_id,\n",
503
+ " device=device,\n",
504
+ " d_model=d_model,\n",
505
+ " num_labels=num_labels)\n",
506
+ "\n",
507
+ " # Initialize parameters with Xaviar/Glorot\n",
508
+ " # This maintains a consistent variance of activations throughout the network\n",
509
+ " # Helps avoid issues like vanishing or exploding gradients.\n",
510
+ " for p in model.parameters():\n",
511
+ " if p.dim() > 1:\n",
512
+ " nn.init.xavier_uniform_(p)\n",
513
+ "\n",
514
+ " return model"
515
+ ],
516
+ "metadata": {
517
+ "id": "-7adHoyYBcqT"
518
+ },
519
+ "execution_count": null,
520
+ "outputs": []
521
+ },
522
+ {
523
+ "cell_type": "code",
524
+ "source": [
525
+ "model = make_model(device=device,\n",
526
+ " tokenizer=tokenizer,\n",
527
+ " n_layers=4,\n",
528
+ " d_model=768,\n",
529
+ " num_labels=5,\n",
530
+ " n_heads=8,\n",
531
+ " dropout=0.1,\n",
532
+ " max_length=32)\n",
533
+ "\n",
534
+ "model.to(device)"
535
+ ],
536
+ "metadata": {
537
+ "id": "M0EbhBuQBhUK"
538
+ },
539
+ "execution_count": null,
540
+ "outputs": []
541
+ },
542
+ {
543
+ "cell_type": "code",
544
+ "source": [
545
+ "print(f\"The model has {(sum(p.numel() for p in model.parameters() if p.requires_grad)):,} trainable parameters\")"
546
+ ],
547
+ "metadata": {
548
+ "id": "NT37aWKnBk4y"
549
+ },
550
+ "execution_count": null,
551
+ "outputs": []
552
+ },
553
+ {
554
+ "cell_type": "code",
555
+ "source": [
556
+ "lr = 1e-4\n",
557
+ "\n",
558
+ "optimizer = torch.optim.Adam(params=model.parameters(),\n",
559
+ " lr=lr,\n",
560
+ " betas=(0.9, 0.999))\n",
561
+ "loss_fn = nn.CrossEntropyLoss()\n",
562
+ "scaler = GradScaler()"
563
+ ],
564
+ "metadata": {
565
+ "id": "hZmiAxW-BmLW"
566
+ },
567
+ "execution_count": null,
568
+ "outputs": []
569
+ },
570
+ {
571
+ "cell_type": "code",
572
+ "source": [
573
+ "def train(model,\n",
574
+ " iterator,\n",
575
+ " optimizer,\n",
576
+ " loss_fn,\n",
577
+ " clip,\n",
578
+ " epoch):\n",
579
+ " model.train()\n",
580
+ " epoch_loss = 0\n",
581
+ "\n",
582
+ " pbar = tqdm(iterator, total=len(iterator), desc=f\"Epoch {epoch + 1} Progress\", colour=\"#005500\")\n",
583
+ " for i, batch in enumerate(pbar):\n",
584
+ " src = batch[\"text\"][\"input_ids\"].to(device)\n",
585
+ " labels = batch[\"label\"].to(device)\n",
586
+ "\n",
587
+ " optimizer.zero_grad()\n",
588
+ " with autocast():\n",
589
+ " # Forward pass\n",
590
+ " logits = model(src)\n",
591
+ "\n",
592
+ " # Calculate the loss\n",
593
+ " loss = loss_fn(logits, labels)\n",
594
+ "\n",
595
+ " scaler.scale(loss).backward()\n",
596
+ " scaler.unscale_(optimizer)\n",
597
+ " nn.utils.clip_grad_norm_(model.parameters(), clip)\n",
598
+ " scaler.step(optimizer)\n",
599
+ " scaler.update()\n",
600
+ " epoch_loss += loss.item()\n",
601
+ "\n",
602
+ " pbar.set_postfix(loss=loss.item()) # Update the loss on the tqdm progress bar\n",
603
+ "\n",
604
+ " return (epoch_loss / len(iterator))"
605
+ ],
606
+ "metadata": {
607
+ "id": "WMNVjg0UBqQF"
608
+ },
609
+ "execution_count": null,
610
+ "outputs": []
611
+ },
612
+ {
613
+ "cell_type": "code",
614
+ "source": [
615
+ "def evaluate(model,\n",
616
+ " iterator,\n",
617
+ " loss_fn):\n",
618
+ " model.eval()\n",
619
+ " epoch_loss = 0\n",
620
+ "\n",
621
+ " with torch.inference_mode():\n",
622
+ " for i, batch in enumerate(iterator):\n",
623
+ " src = batch[\"text\"][\"input_ids\"].to(device)\n",
624
+ " labels = batch[\"label\"].to(device)\n",
625
+ "\n",
626
+ " # Forward pass\n",
627
+ " logits = model(src)\n",
628
+ "\n",
629
+ " # Calculate the loss\n",
630
+ " loss = loss_fn(logits, labels)\n",
631
+ " epoch_loss += loss.item()\n",
632
+ "\n",
633
+ " return (epoch_loss / len(iterator))"
634
+ ],
635
+ "metadata": {
636
+ "id": "V0McrJ1FF5d3"
637
+ },
638
+ "execution_count": null,
639
+ "outputs": []
640
+ },
641
+ {
642
+ "cell_type": "code",
643
+ "source": [
644
+ "def epoch_time(start_time, end_time):\n",
645
+ " elapsed_time = end_time - start_time\n",
646
+ " elapsed_mins = int(elapsed_time / 60)\n",
647
+ " elapsed_secs = int(elapsed_time - (elapsed_mins * 60))\n",
648
+ " return elapsed_mins, elapsed_secs"
649
+ ],
650
+ "metadata": {
651
+ "id": "rq9YQv_eF5YQ"
652
+ },
653
+ "execution_count": null,
654
+ "outputs": []
655
+ },
656
+ {
657
+ "cell_type": "code",
658
+ "source": [
659
+ "epochs = 10\n",
660
+ "clip = 1\n",
661
+ "\n",
662
+ "best_valid_loss = float(\"inf\")\n",
663
+ "model_path = \"sentiment_analysis_model.pt\"\n",
664
+ "\n",
665
+ "if os.path.exists(model_path):\n",
666
+ " print(f\"Loading model from {model_path}...\")\n",
667
+ " model.load_state_dict(torch.load(model_path, map_location=device))"
668
+ ],
669
+ "metadata": {
670
+ "id": "JE6JAXM-F5Qc"
671
+ },
672
+ "execution_count": null,
673
+ "outputs": []
674
+ },
675
+ {
676
+ "cell_type": "code",
677
+ "source": [
678
+ "should_train = True\n",
679
+ "\n",
680
+ "if should_train:\n",
681
+ " for epoch in tqdm(range(epochs), desc=f\"Training progress\", colour=\"#00ff00\"):\n",
682
+ " start_time = time.time()\n",
683
+ "\n",
684
+ " train_loss = train(model=model,\n",
685
+ " iterator=train_dataloader,\n",
686
+ " optimizer=optimizer,\n",
687
+ " loss_fn=loss_fn,\n",
688
+ " clip=clip,\n",
689
+ " epoch=epoch)\n",
690
+ "\n",
691
+ " end_time = time.time()\n",
692
+ " epoch_mins, epoch_secs = epoch_time(start_time, end_time)\n",
693
+ "\n",
694
+ " message = f\"Epoch: {epoch + 1} | Time: {epoch_mins}m {epoch_secs}s --> STORED\"\n",
695
+ "\n",
696
+ " torch.save(model.state_dict(), model_path)\n",
697
+ "\n",
698
+ " print(message)\n",
699
+ " print(f\"Train Loss: {train_loss:.6f}\")"
700
+ ],
701
+ "metadata": {
702
+ "id": "ruWsYqeYGCi0"
703
+ },
704
+ "execution_count": null,
705
+ "outputs": []
706
+ },
707
+ {
708
+ "cell_type": "code",
709
+ "source": [
710
+ "test_loss = evaluate(model=model,\n",
711
+ " iterator=test_dataloader,\n",
712
+ " loss_fn=loss_fn)\n",
713
+ "\n",
714
+ "print(f\"Test Loss: {test_loss:.6f}\")"
715
+ ],
716
+ "metadata": {
717
+ "id": "GNHZ-ft8GHGy"
718
+ },
719
+ "execution_count": null,
720
+ "outputs": []
721
+ },
722
+ {
723
+ "cell_type": "code",
724
+ "source": [
725
+ "def get_sentiment(question, model, device, max_length: int = 32):\n",
726
+ " model.eval()\n",
727
+ "\n",
728
+ " encoded = tokenizer(question, truncation=True, max_length=max_length, return_tensors=\"pt\")\n",
729
+ " src_tensor = encoded[\"input_ids\"].to(device)\n",
730
+ "\n",
731
+ " with torch.inference_mode():\n",
732
+ " # Forward pass for classification.\n",
733
+ " logits = model(src_tensor) # shape: (1, num_labels)\n",
734
+ "\n",
735
+ " # Get the predicted class (index) with the highest score.\n",
736
+ " pred_index = torch.argmax(logits, dim=1).item()\n",
737
+ "\n",
738
+ " sentiment_map = {\n",
739
+ " 0: \"Very Negative\",\n",
740
+ " 1: \"Negative\",\n",
741
+ " 2: \"Neutral\",\n",
742
+ " 3: \"Positive\",\n",
743
+ " 4: \"Very Positive\"\n",
744
+ " }\n",
745
+ " predicted_sentiment = sentiment_map.get(pred_index, \"unknown\")\n",
746
+ "\n",
747
+ " return predicted_sentiment"
748
+ ],
749
+ "metadata": {
750
+ "id": "0ej2-U8dGrot"
751
+ },
752
+ "execution_count": null,
753
+ "outputs": []
754
+ },
755
+ {
756
+ "cell_type": "code",
757
+ "source": [
758
+ "#@title Question Answering\n",
759
+ "src_sentence = \"That book was amazing!\" #@param \"\"\n",
760
+ "\n",
761
+ "predicted_sentiment = get_sentiment(src_sentence, model, device, max_length=32)\n",
762
+ "\n",
763
+ "print(predicted_sentiment)"
764
+ ],
765
+ "metadata": {
766
+ "id": "oCwZfvW5IpWG"
767
+ },
768
+ "execution_count": null,
769
+ "outputs": []
770
+ }
771
+ ]
772
+ }
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import torch
4
+ import gradio as gr
5
+ from transformers import AutoTokenizer
6
+ from model import make_model, get_sentiment
7
+
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+
10
+ # Load the tokenizer and model
11
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
12
+ model = make_model(
13
+ device=device,
14
+ tokenizer=tokenizer,
15
+ n_layers=4,
16
+ d_model=768,
17
+ num_labels=5,
18
+ n_heads=8,
19
+ dropout=0.1,
20
+ max_length=32,
21
+ )
22
+ model.to(device)
23
+
24
+ model_path = "sentiment_analysis_model.pt"
25
+ if os.path.exists(model_path):
26
+ print(f"Loading model from {model_path}...")
27
+ model.load_state_dict(torch.load(model_path, map_location=device))
28
+ else:
29
+ print("No pretrained model found. Using randomly initialized weights.")
30
+
31
+
32
+ def predict_sentiment(text):
33
+ sentiment = get_sentiment(text, model, tokenizer, device, max_length=32)
34
+ return sentiment
35
+
36
+
37
+ css_str = """
38
+ body {
39
+ background-color: #f7f7f7;
40
+ }
41
+
42
+ .title {
43
+ font-size: 48px;
44
+ font-weight: bold;
45
+ text-align: center;
46
+ margin-top: 20px;
47
+ }
48
+
49
+ .description {
50
+ font-size: 20px;
51
+ text-align: center;
52
+ argin-bottom: 40px;
53
+ }
54
+ """
55
+
56
+ with gr.Blocks(css=css_str) as demo:
57
+ gr.Markdown("<div class='title'>Sentiment Diffusion</div>")
58
+ gr.Markdown(
59
+ "<div class='description'>Enter a sentence and see the predicted sentiment.</div>"
60
+ )
61
+ text_input = gr.Textbox(
62
+ label="Enter Text", lines=3, placeholder="Type your review or sentence here..."
63
+ )
64
+ predict_btn = gr.Button("Predict Sentiment")
65
+ output_box = gr.Textbox(label="Predicted Sentiment")
66
+ predict_btn.click(fn=predict_sentiment, inputs=text_input, outputs=output_box)
67
+
68
+ if __name__ == "__main__":
69
+ demo.launch(share=True)
model.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from torch import Tensor
6
+
7
+
8
+ class EmbeddingLayer(nn.Module):
9
+ def __init__(self, vocab_size: int, d_model: int = 768):
10
+ super().__init__()
11
+
12
+ self.d_model = d_model
13
+
14
+ self.lut = nn.Embedding(
15
+ num_embeddings=vocab_size, embedding_dim=d_model
16
+ ) # (vocab_size, d_model)
17
+
18
+ def forward(self, x):
19
+ # x shape: (batch_size, seq_len)
20
+ return self.lut(x) * math.sqrt(self.d_model) # (batch_size, seq_len, d_model)
21
+
22
+
23
+ class PositionalEncoding(nn.Module):
24
+ def __init__(self, d_model: int = 768, dropout: float = 0.1, max_length: int = 128):
25
+ super().__init__()
26
+
27
+ self.dropout = nn.Dropout(p=dropout)
28
+
29
+ pe = torch.zeros(max_length, d_model) # (max_length, d_model)
30
+ # Create position column
31
+ k = torch.arange(0, max_length).unsqueeze(dim=1) # (max_length, 1)
32
+
33
+ # Use the log version of the function for positional encodings
34
+ div_term = torch.exp(
35
+ torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)
36
+ ) # (d_model / 2)
37
+
38
+ # Use sine for the even indices and cosine for the odd indices
39
+ pe[:, 0::2] = torch.sin(k * div_term)
40
+ pe[:, 1::2] = torch.cos(k * div_term)
41
+
42
+ pe = pe.unsqueeze(dim=0) # Add the batch dimension(1, max_length, d_model)
43
+
44
+ # We use a buffer because the positional encoding is fixed and not a model paramter that we want to be updated during backpropagation.
45
+ self.register_buffer(
46
+ "pe", pe
47
+ ) # Buffers are saved with the model state and are moved to the correct device
48
+
49
+ def forward(self, x):
50
+ # x shape: (batch_size, seq_length, d_model)
51
+ x += self.pe[:, : x.size(1)]
52
+ return self.dropout(x)
53
+
54
+
55
+ class MultiHeadAttention(nn.Module):
56
+ def __init__(self, d_model: int = 768, n_heads: int = 8, dropout: float = 0.1):
57
+ super().__init__()
58
+ assert d_model % n_heads == 0
59
+
60
+ self.d_model = d_model
61
+ self.n_heads = n_heads
62
+ self.d_key = d_model // n_heads
63
+
64
+ self.Wq = nn.Linear(in_features=d_model, out_features=d_model)
65
+ self.Wk = nn.Linear(in_features=d_model, out_features=d_model)
66
+ self.Wv = nn.Linear(in_features=d_model, out_features=d_model)
67
+ self.Wo = nn.Linear(in_features=d_model, out_features=d_model)
68
+
69
+ self.dropout = nn.Dropout(p=dropout)
70
+
71
+ def forward(self, query: Tensor, key: Tensor, value: Tensor, mask: Tensor = None):
72
+ # input shape: (batch_size, seq_len, d_model)
73
+
74
+ batch_size = key.size(0)
75
+
76
+ Q = self.Wq(query)
77
+ K = self.Wk(key)
78
+ V = self.Wv(value)
79
+
80
+ Q = Q.view(batch_size, -1, self.n_heads, self.d_key).permute(
81
+ 0, 2, 1, 3
82
+ ) # (batch_size, n_heads, q_length, d_key)
83
+ K = K.view(batch_size, -1, self.n_heads, self.d_key).permute(
84
+ 0, 2, 1, 3
85
+ ) # (batch_size, n_heads, k_length, d_key)
86
+ V = V.view(batch_size, -1, self.n_heads, self.d_key).permute(
87
+ 0, 2, 1, 3
88
+ ) # (batch_size, n_heads, v_length, d_key)
89
+
90
+ scaled_dot_product = torch.matmul(Q, K.permute(0, 1, 3, 2)) / math.sqrt(
91
+ self.d_key
92
+ ) # (batch_size, n_heads, q_length, k_length)
93
+
94
+ if mask is not None:
95
+ scaled_dot_product = scaled_dot_product.masked_fill(
96
+ mask == 0, float("-inf")
97
+ )
98
+
99
+ attention_probs = torch.softmax(scaled_dot_product, dim=-1)
100
+
101
+ A = torch.matmul(
102
+ self.dropout(attention_probs), V
103
+ ) # (batch_size, n_heads, q_length, d_key)
104
+
105
+ A = A.permute(0, 2, 1, 3) # (batch_size, q_length, n_heads, d_key)
106
+ A = A.contiguous().view(
107
+ batch_size, -1, self.n_heads * self.d_key
108
+ ) # (batch_size, q_length, d_model)
109
+
110
+ output = self.Wo(A) # (batch_size, q_length, d_model)
111
+
112
+ return output, attention_probs
113
+
114
+
115
+ class PositionwiseFeedForward(nn.Module):
116
+ def __init__(self, d_model: int = 768, dropout: float = 0.1):
117
+ super().__init__()
118
+
119
+ self.ffn = nn.Sequential(
120
+ nn.Linear(in_features=d_model, out_features=(d_model * 4)),
121
+ nn.ReLU(),
122
+ nn.Linear(in_features=(d_model * 4), out_features=d_model),
123
+ nn.Dropout(p=dropout),
124
+ )
125
+
126
+ def forward(self, x):
127
+ # x shape: (batch_size, q_length, d_model)
128
+ return self.ffn(x) # (batch_size, q_length, d_model)
129
+
130
+
131
+ class EncoderLayer(nn.Module):
132
+ def __init__(self, d_model: int = 768, n_heads: int = 8, dropout: float = 0.1):
133
+ super().__init__()
134
+
135
+ self.attention = MultiHeadAttention(
136
+ d_model=d_model, n_heads=n_heads, dropout=dropout
137
+ )
138
+ self.attention_layer_norm = nn.LayerNorm(d_model)
139
+
140
+ self.position_wise_ffn = PositionwiseFeedForward(
141
+ d_model=d_model, dropout=dropout
142
+ )
143
+ self.ffn_layer_norm = nn.LayerNorm(d_model)
144
+
145
+ self.dropout = nn.Dropout(p=dropout)
146
+
147
+ def forward(self, src: Tensor, src_mask: Tensor):
148
+ _src, attention_probs = self.attention(
149
+ query=src, key=src, value=src, mask=src_mask
150
+ )
151
+ src = self.attention_layer_norm(src + self.dropout(_src))
152
+
153
+ _src = self.position_wise_ffn(src)
154
+ src = self.ffn_layer_norm(src + self.dropout(_src))
155
+
156
+ return src, attention_probs
157
+
158
+
159
+ class Encoder(nn.Module):
160
+ def __init__(
161
+ self,
162
+ d_model: int = 768,
163
+ n_layers: int = 3,
164
+ n_heads: int = 8,
165
+ dropout: float = 0.1,
166
+ ):
167
+ super().__init__()
168
+
169
+ self.layers = nn.ModuleList(
170
+ [
171
+ EncoderLayer(d_model=d_model, n_heads=n_heads, dropout=dropout)
172
+ for layer in range(n_layers)
173
+ ]
174
+ )
175
+ self.dropout = nn.Dropout(p=dropout)
176
+
177
+ def forward(self, src: Tensor, src_mask: Tensor):
178
+
179
+ for layer in self.layers:
180
+ src, attention_probs = layer(src, src_mask)
181
+
182
+ self.attention_probs = attention_probs
183
+
184
+ # src += torch.randn_like(src) * 0.001
185
+ return src
186
+
187
+
188
+ class Transformer(nn.Module):
189
+ def __init__(
190
+ self,
191
+ encoder: Encoder,
192
+ src_embed: EmbeddingLayer,
193
+ src_pad_idx: int,
194
+ device,
195
+ d_model: int = 768,
196
+ num_labels: int = 5,
197
+ ):
198
+ super().__init__()
199
+
200
+ self.encoder = encoder
201
+ self.src_embed = src_embed
202
+ self.device = device
203
+ self.src_pad_idx = src_pad_idx
204
+
205
+ self.dropout = nn.Dropout(p=0.1)
206
+ self.classifier = nn.Linear(in_features=d_model, out_features=num_labels)
207
+
208
+ def make_src_mask(self, src: Tensor):
209
+ # Assign 1 to tokens that need attended to and 0 to padding tokens, then add 2 dimensions
210
+ src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
211
+
212
+ return src_mask
213
+
214
+ def forward(self, src: Tensor):
215
+ src_mask = self.make_src_mask(src) # (batch_size, 1, 1, src_seq_length)
216
+ output = self.encoder(
217
+ self.src_embed(src), src_mask
218
+ ) # (batch_size, src_seq_length, d_model)
219
+ output = output[
220
+ :, 0, :
221
+ ] # Get the sos token vector representation (works sort of like a cls token in ViT) shape: (batch_size, 1, d_model)
222
+ logits = self.classifier(self.dropout(output))
223
+
224
+ return logits
225
+
226
+
227
+ def make_model(
228
+ device,
229
+ tokenizer,
230
+ n_layers: int = 3,
231
+ d_model: int = 768,
232
+ num_labels: int = 5,
233
+ n_heads: int = 8,
234
+ dropout: float = 0.1,
235
+ max_length: int = 128,
236
+ ):
237
+ encoder = Encoder(
238
+ d_model=d_model, n_layers=n_layers, n_heads=n_heads, dropout=dropout
239
+ )
240
+
241
+ src_embed = EmbeddingLayer(vocab_size=tokenizer.vocab_size, d_model=d_model)
242
+
243
+ pos_enc = PositionalEncoding(
244
+ d_model=d_model, dropout=dropout, max_length=max_length
245
+ )
246
+
247
+ model = Transformer(
248
+ encoder=encoder,
249
+ src_embed=nn.Sequential(src_embed, pos_enc),
250
+ src_pad_idx=tokenizer.pad_token_id,
251
+ device=device,
252
+ d_model=d_model,
253
+ num_labels=num_labels,
254
+ )
255
+
256
+ # Initialize parameters with Xaviar/Glorot
257
+ # This maintains a consistent variance of activations throughout the network
258
+ # Helps avoid issues like vanishing or exploding gradients.
259
+ for p in model.parameters():
260
+ if p.dim() > 1:
261
+ nn.init.xavier_uniform_(p)
262
+
263
+ return model
264
+
265
+
266
+ def get_sentiment(text, model, tokenizer, device, max_length: int = 32):
267
+ model.eval()
268
+
269
+ encoded = model.src_embed[0].lut.weight.new_tensor([])
270
+ encoded = tokenizer(
271
+ text,
272
+ truncation=True,
273
+ max_length=max_length,
274
+ padding="max_length",
275
+ return_tensors="pt",
276
+ )
277
+
278
+ src_tensor = encoded["input_ids"].to(device)
279
+
280
+ with torch.inference_mode():
281
+ logits = model(src_tensor) # shape: (batch_size, num_labels)
282
+
283
+ pred_index = torch.argmax(logits, dim=1).item()
284
+
285
+ sentiment_map = {
286
+ 0: "Very Negative",
287
+ 1: "Negative",
288
+ 2: "Neutral",
289
+ 3: "Positive",
290
+ 4: "Very Positive",
291
+ }
292
+ return sentiment_map.get(pred_index, "Unknown")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ datasets
4
+ gradio
5
+ nltk
sentiment_analysis_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae4e6ac0f01d92d35262998fc93d46e976636a23dd21073867a93eb1a80a84a
3
+ size 207310930