donsek commited on
Commit
2427cf0
·
verified ·
1 Parent(s): a6141de

Upload code.ipynb

Browse files
Files changed (1) hide show
  1. code.ipynb +535 -0
code.ipynb ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "38957f6a",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd \n",
11
+ "import numpy as np\n",
12
+ "import matplotlib.pyplot as plt"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 6,
18
+ "id": "9ec92866",
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "name": "stderr",
23
+ "output_type": "stream",
24
+ "text": [
25
+ "c:\\Users\\ukhal\\anaconda3\\envs\\datascience\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
26
+ " from .autonotebook import tqdm as notebook_tqdm\n"
27
+ ]
28
+ },
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "WARNING:tensorflow:From c:\\Users\\ukhal\\anaconda3\\envs\\datascience\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
34
+ "\n"
35
+ ]
36
+ }
37
+ ],
38
+ "source": [
39
+ "from sentence_transformers import SentenceTransformer\n",
40
+ "\n",
41
+ "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n",
42
+ "model.save('my_local_models/miniLM-v2')"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "code",
47
+ "execution_count": null,
48
+ "id": "a9fc3745",
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "name": "stderr",
53
+ "output_type": "stream",
54
+ "text": [
55
+ "Batches: 100%|██████████| 1720/1720 [05:45<00:00, 4.98it/s]\n"
56
+ ]
57
+ }
58
+ ],
59
+ "source": [
60
+ "vectors = model.encode(df['text'].tolist(), batch_size=32, show_progress_bar=True)\n",
61
+ "\n",
62
+ "# Add the vectors as a new column\n",
63
+ "df['vector'] = list(vectors)"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 8,
69
+ "id": "616a89d5",
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "from sklearn.preprocessing import LabelEncoder\n",
74
+ "\n",
75
+ "country_encoder = LabelEncoder()\n",
76
+ "df['country_id'] = country_encoder.fit_transform(df['country'])"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": null,
82
+ "id": "1a5d9807",
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "Epoch 1 — Loss: 559.1745\n",
90
+ "Epoch 2 — Loss: 511.0904\n",
91
+ "Epoch 3 — Loss: 487.1494\n",
92
+ "Epoch 4 — Loss: 476.0557\n",
93
+ "Epoch 5 — Loss: 463.6449\n",
94
+ "Epoch 6 — Loss: 458.0139\n",
95
+ "Epoch 7 — Loss: 454.9403\n",
96
+ "Epoch 8 — Loss: 445.9739\n",
97
+ "Epoch 9 — Loss: 443.4053\n",
98
+ "Epoch 10 — Loss: 441.2702\n",
99
+ "Epoch 11 — Loss: 435.5733\n",
100
+ "Epoch 12 — Loss: 432.5762\n",
101
+ "Epoch 13 — Loss: 428.4215\n",
102
+ "Epoch 14 — Loss: 424.5392\n",
103
+ "Epoch 15 — Loss: 427.4328\n",
104
+ "Epoch 16 — Loss: 419.4463\n",
105
+ "Epoch 17 — Loss: 420.8522\n",
106
+ "Epoch 18 — Loss: 418.8724\n",
107
+ "Epoch 19 — Loss: 410.7244\n",
108
+ "Epoch 20 — Loss: 408.1810\n",
109
+ "Epoch 21 — Loss: 404.8192\n",
110
+ "Epoch 22 — Loss: 402.0590\n",
111
+ "Epoch 23 — Loss: 400.0788\n",
112
+ "Epoch 24 — Loss: 395.5753\n",
113
+ "Epoch 25 — Loss: 391.3283\n",
114
+ "Epoch 26 — Loss: 390.9558\n",
115
+ "Epoch 27 — Loss: 386.5741\n",
116
+ " precision recall f1-score support\n",
117
+ "\n",
118
+ " Not Yes 0.78 0.88 0.83 27643\n",
119
+ " Yes 0.86 0.75 0.80 27377\n",
120
+ "\n",
121
+ " accuracy 0.81 55020\n",
122
+ " macro avg 0.82 0.81 0.81 55020\n",
123
+ "weighted avg 0.82 0.81 0.81 55020\n",
124
+ "\n"
125
+ ]
126
+ }
127
+ ],
128
+ "source": [
129
+ "import torch\n",
130
+ "import torch.nn as nn\n",
131
+ "import numpy as np\n",
132
+ "from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler\n",
133
+ "from sklearn.preprocessing import LabelEncoder\n",
134
+ "from sklearn.metrics import classification_report\n",
135
+ "\n",
136
+ "# ----------------------------\n",
137
+ "# Модель\n",
138
+ "# ----------------------------\n",
139
+ "\n",
140
+ "class VotePredictor(nn.Module):\n",
141
+ " def __init__(self, text_dim=384, country_count=193, country_emb_dim=32, hidden_dim=256):\n",
142
+ " super(VotePredictor, self).__init__()\n",
143
+ " self.country_embedding = nn.Embedding(country_count, country_emb_dim)\n",
144
+ "\n",
145
+ " self.model = nn.Sequential(\n",
146
+ " nn.Linear(text_dim + country_emb_dim, hidden_dim),\n",
147
+ " nn.ReLU(),\n",
148
+ " nn.Dropout(0.3),\n",
149
+ " nn.Linear(hidden_dim, 1)\n",
150
+ " )\n",
151
+ "\n",
152
+ " def forward(self, text_vecs, country_ids):\n",
153
+ " country_vecs = self.country_embedding(country_ids)\n",
154
+ " x = torch.cat([text_vecs, country_vecs], dim=1)\n",
155
+ " return self.model(x)\n",
156
+ "\n",
157
+ "# ----------------------------\n",
158
+ "# Подготовка данных\n",
159
+ "# ----------------------------\n",
160
+ "\n",
161
+ "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
162
+ "model = VotePredictor().to(device)\n",
163
+ "criterion = nn.BCEWithLogitsLoss()\n",
164
+ "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n",
165
+ "\n",
166
+ "# Подготовка тензоров\n",
167
+ "X_vectors = np.stack(df['vector'].values)\n",
168
+ "y_labels = df['vote'].values\n",
169
+ "country_ids = country_encoder.fit_transform(df['country'].values)\n",
170
+ "\n",
171
+ "X_tensor = torch.tensor(X_vectors, dtype=torch.float32)\n",
172
+ "y_tensor = torch.tensor(y_labels, dtype=torch.float32)\n",
173
+ "c_tensor = torch.tensor(country_ids, dtype=torch.long)\n",
174
+ "\n",
175
+ "# Тензорный датасет\n",
176
+ "dataset = TensorDataset(X_tensor, c_tensor, y_tensor)\n",
177
+ "\n",
178
+ "# ----------------------------\n",
179
+ "# Логика весов\n",
180
+ "# ----------------------------\n",
181
+ "\n",
182
+ "# Веса\n",
183
+ "class_sample_count = np.array([(y_tensor == 0).sum(), (y_tensor == 1).sum()])\n",
184
+ "weights = 1. / class_sample_count\n",
185
+ "sample_weights = weights[y_tensor.long().numpy()]\n",
186
+ "\n",
187
+ "sampler = WeightedRandomSampler(\n",
188
+ " weights=sample_weights,\n",
189
+ " num_samples=len(sample_weights),\n",
190
+ " replacement=True\n",
191
+ ")\n",
192
+ "\n",
193
+ "# Загружаем данные\n",
194
+ "train_loader = DataLoader(dataset, batch_size=64, sampler=sampler)\n",
195
+ "\n",
196
+ "# ----------------------------\n",
197
+ "# Эпохи обучения\n",
198
+ "# ----------------------------\n",
199
+ "\n",
200
+ "for epoch in range(27):\n",
201
+ " model.train()\n",
202
+ " total_loss = 0\n",
203
+ "\n",
204
+ " for batch_x, batch_c, batch_y in train_loader:\n",
205
+ " batch_x, batch_c, batch_y = batch_x.to(device), batch_c.to(device), batch_y.to(device)\n",
206
+ "\n",
207
+ " optimizer.zero_grad()\n",
208
+ " logits = model(batch_x, batch_c).squeeze()\n",
209
+ " loss = criterion(logits, batch_y)\n",
210
+ " loss.backward()\n",
211
+ " optimizer.step()\n",
212
+ "\n",
213
+ " total_loss += loss.item()\n",
214
+ "\n",
215
+ " print(f\"Epoch {epoch+1} — Loss: {total_loss:.4f}\")\n",
216
+ "\n",
217
+ "# ----------------------------\n",
218
+ "# Оценка\n",
219
+ "# ----------------------------\n",
220
+ "\n",
221
+ "model.eval()\n",
222
+ "all_preds, all_true, all_country_ids = [], [], []\n",
223
+ "\n",
224
+ "with torch.no_grad():\n",
225
+ " for batch_x, batch_c, batch_y in train_loader: # or use test_loader if you split\n",
226
+ " logits = model(batch_x.to(device), batch_c.to(device)).squeeze()\n",
227
+ " probs = torch.sigmoid(logits).cpu().numpy()\n",
228
+ " preds = (probs > 0.5445639).astype(int)\n",
229
+ "\n",
230
+ " all_preds.extend(preds)\n",
231
+ " all_true.extend(batch_y.numpy())\n",
232
+ " all_country_ids.extend(batch_c.numpy()) # <— Here's the missing link\n",
233
+ "\n",
234
+ "print(classification_report(all_true, all_preds, target_names=['Not Yes', 'Yes']))\n"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": null,
240
+ "id": "7ff81e59",
241
+ "metadata": {},
242
+ "outputs": [],
243
+ "source": [
244
+ "problem_countries = df_metrics[df_metrics['f1'] < 0.7]['country'].tolist()\n",
245
+ "print(f\"{len(problem_countries)} countries with F1 < 0.7.\")"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": 15,
251
+ "id": "9d345404",
252
+ "metadata": {},
253
+ "outputs": [],
254
+ "source": [
255
+ "df_problem = df[df['country'].isin(problem_countries)].copy()"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 16,
261
+ "id": "dac22a07",
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": [
265
+ "from sklearn.preprocessing import LabelEncoder\n",
266
+ "\n",
267
+ "problem_country_encoder = LabelEncoder()\n",
268
+ "df_problem['country_id'] = problem_country_encoder.fit_transform(df_problem['country'])"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "code",
273
+ "execution_count": null,
274
+ "id": "ebf3b626",
275
+ "metadata": {},
276
+ "outputs": [],
277
+ "source": [
278
+ "X_problem = np.stack(df_problem['vector'].values)\n",
279
+ "y_problem = df_problem['vote'].values\n",
280
+ "c_problem = df_problem['country_id'].values\n",
281
+ "\n",
282
+ "X_tensor = torch.tensor(X_problem, dtype=torch.float32)\n",
283
+ "y_tensor = torch.tensor(y_problem, dtype=torch.float32)\n",
284
+ "c_tensor = torch.tensor(c_problem, dtype=torch.long)\n",
285
+ "\n",
286
+ "from torch.utils.data import TensorDataset, DataLoader\n",
287
+ "\n",
288
+ "dataset = TensorDataset(X_tensor, c_tensor, y_tensor)\n",
289
+ "\n",
290
+ "class_sample_count = np.array([(y_tensor == 0).sum(), (y_tensor == 1).sum()])\n",
291
+ "weights = 1. / class_sample_count\n",
292
+ "sample_weights = weights[y_tensor.long().numpy()]\n",
293
+ "sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)\n",
294
+ "\n",
295
+ "train_loader = DataLoader(dataset, batch_size=64, sampler=sampler)\n",
296
+ "\n",
297
+ "problem_model = VotePredictor(country_count=len(problem_country_encoder.classes_)).to(device)\n",
298
+ "criterion = nn.BCEWithLogitsLoss()\n",
299
+ "optimizer = torch.optim.Adam(problem_model.parameters(), lr=1e-4)"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "id": "facb3c23",
306
+ "metadata": {},
307
+ "outputs": [
308
+ {
309
+ "name": "stdout",
310
+ "output_type": "stream",
311
+ "text": [
312
+ "Epoch 1 — Loss: 176.5783\n",
313
+ "Epoch 2 — Loss: 172.1360\n",
314
+ "Epoch 3 — Loss: 169.1655\n",
315
+ "Epoch 4 — Loss: 167.5052\n",
316
+ "Epoch 5 — Loss: 167.0431\n",
317
+ "Epoch 6 — Loss: 164.9137\n",
318
+ "Epoch 7 — Loss: 165.0920\n",
319
+ "Epoch 8 — Loss: 164.1620\n",
320
+ "\n",
321
+ "🧾 SPECIAL MODEL EVALUATION (Bad-F1 Countries Only):\n",
322
+ "\n",
323
+ " precision recall f1-score support\n",
324
+ "\n",
325
+ " Not Yes 0.64 0.64 0.64 8252\n",
326
+ " Yes 0.64 0.64 0.64 8254\n",
327
+ "\n",
328
+ " accuracy 0.64 16506\n",
329
+ " macro avg 0.64 0.64 0.64 16506\n",
330
+ "weighted avg 0.64 0.64 0.64 16506\n",
331
+ "\n"
332
+ ]
333
+ }
334
+ ],
335
+ "source": [
336
+ "import torch\n",
337
+ "import torch.nn as nn\n",
338
+ "import numpy as np\n",
339
+ "import pandas as pd\n",
340
+ "from sklearn.preprocessing import LabelEncoder\n",
341
+ "from sklearn.metrics import classification_report\n",
342
+ "from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler\n",
343
+ "\n",
344
+ "# ----------------------\n",
345
+ "# Модель\n",
346
+ "# ----------------------\n",
347
+ "\n",
348
+ "class VotePredictor(nn.Module):\n",
349
+ " def __init__(self, text_dim=384, country_count=50, country_emb_dim=32, hidden_dim=256):\n",
350
+ " super(VotePredictor, self).__init__()\n",
351
+ " self.country_embedding = nn.Embedding(country_count, country_emb_dim)\n",
352
+ " self.model = nn.Sequential(\n",
353
+ " nn.Linear(text_dim + country_emb_dim, hidden_dim),\n",
354
+ " nn.ReLU(),\n",
355
+ " nn.Dropout(0.3),\n",
356
+ " nn.Linear(hidden_dim, 1)\n",
357
+ " )\n",
358
+ "\n",
359
+ " def forward(self, text_vecs, country_ids):\n",
360
+ " country_vecs = self.country_embedding(country_ids)\n",
361
+ " x = torch.cat([text_vecs, country_vecs], dim=1)\n",
362
+ " return self.model(x)\n",
363
+ "\n",
364
+ "# ----------------------\n",
365
+ "# STEP 1: Фильтруем проблемные страны\n",
366
+ "# ----------------------\n",
367
+ "\n",
368
+ "problem_countries = df_metrics[df_metrics['f1'] < 0.7]['country'].tolist()\n",
369
+ "df_problem = df[df['country'].isin(problem_countries)].copy()\n",
370
+ "\n",
371
+ "# ----------------------\n",
372
+ "# STEP 2: Энкодинг стран\n",
373
+ "# ----------------------\n",
374
+ "\n",
375
+ "problem_country_encoder = LabelEncoder()\n",
376
+ "df_problem['country_id'] = problem_country_encoder.fit_transform(df_problem['country'])\n",
377
+ "\n",
378
+ "X_problem = np.stack(df_problem['vector'].values)\n",
379
+ "y_problem = df_problem['vote'].values\n",
380
+ "c_problem = df_problem['country_id'].values\n",
381
+ "\n",
382
+ "# ----------------------\n",
383
+ "# STEP 3: Подготовка тензоров\n",
384
+ "# ----------------------\n",
385
+ "\n",
386
+ "X_tensor = torch.tensor(X_problem, dtype=torch.float32)\n",
387
+ "y_tensor = torch.tensor(y_problem, dtype=torch.float32)\n",
388
+ "c_tensor = torch.tensor(c_problem, dtype=torch.long)\n",
389
+ "\n",
390
+ "dataset = TensorDataset(X_tensor, c_tensor, y_tensor)\n",
391
+ "\n",
392
+ "# Веса\n",
393
+ "class_sample_count = np.array([(y_tensor == 0).sum(), (y_tensor == 1).sum()])\n",
394
+ "weights = 1. / class_sample_count\n",
395
+ "sample_weights = weights[y_tensor.long().numpy()]\n",
396
+ "sampler = WeightedRandomSampler(sample_weights, len(sample_weights), replacement=True)\n",
397
+ "\n",
398
+ "train_loader = DataLoader(dataset, batch_size=64, sampler=sampler)\n",
399
+ "\n",
400
+ "# ----------------------\n",
401
+ "# STEP 4: Тренировка модели\n",
402
+ "# ----------------------\n",
403
+ "\n",
404
+ "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
405
+ "model = VotePredictor(country_count=len(problem_country_encoder.classes_)).to(device)\n",
406
+ "criterion = nn.BCEWithLogitsLoss()\n",
407
+ "optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n",
408
+ "\n",
409
+ "# Эпохи обучения\n",
410
+ "for epoch in range(8):\n",
411
+ " model.train()\n",
412
+ " total_loss = 0\n",
413
+ "\n",
414
+ " for batch_x, batch_c, batch_y in train_loader:\n",
415
+ " batch_x, batch_c, batch_y = batch_x.to(device), batch_c.to(device), batch_y.to(device)\n",
416
+ "\n",
417
+ " optimizer.zero_grad()\n",
418
+ " logits = model(batch_x, batch_c).squeeze()\n",
419
+ " loss = criterion(logits, batch_y)\n",
420
+ " loss.backward()\n",
421
+ " optimizer.step()\n",
422
+ "\n",
423
+ " total_loss += loss.item()\n",
424
+ "\n",
425
+ " print(f\"Epoch {epoch+1} — Loss: {total_loss:.4f}\")\n",
426
+ "\n",
427
+ "# ----------------------\n",
428
+ "# STEP 5: Оценка\n",
429
+ "# ----------------------\n",
430
+ "\n",
431
+ "model.eval()\n",
432
+ "all_preds, all_true = [], []\n",
433
+ "\n",
434
+ "with torch.no_grad():\n",
435
+ " for batch_x, batch_c, batch_y in train_loader:\n",
436
+ " logits = model(batch_x.to(device), batch_c.to(device)).squeeze()\n",
437
+ " probs = torch.sigmoid(logits).cpu().numpy()\n",
438
+ " preds = (probs > 0.5).astype(int)\n",
439
+ "\n",
440
+ " all_preds.extend(preds)\n",
441
+ " all_true.extend(batch_y.numpy())\n",
442
+ "\n",
443
+ "print(\"\\n🧾 SPECIAL MODEL EVALUATION (Bad-F1 Countries Only):\\n\")\n",
444
+ "print(classification_report(all_true, all_preds, target_names=['Not Yes', 'Yes']))\n"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "code",
449
+ "execution_count": 54,
450
+ "id": "39995c95",
451
+ "metadata": {},
452
+ "outputs": [
453
+ {
454
+ "data": {
455
+ "text/plain": [
456
+ "['SURINAME',\n",
457
+ " 'TURKMENISTAN',\n",
458
+ " 'MARSHALL ISLANDS',\n",
459
+ " 'MYANMAR',\n",
460
+ " 'GABON',\n",
461
+ " 'CENTRAL AFRICAN REPUBLIC',\n",
462
+ " 'ISRAEL',\n",
463
+ " 'REPUBLIC OF THE CONGO',\n",
464
+ " 'LIBERIA',\n",
465
+ " 'SOMALIA',\n",
466
+ " 'CANADA',\n",
467
+ " \"LAO PEOPLE'S DEMOCRATIC REPUBLIC\",\n",
468
+ " 'TUVALU',\n",
469
+ " 'DEMOCRATIC REPUBLIC OF THE CONGO',\n",
470
+ " 'MONTENEGRO',\n",
471
+ " 'VANUATU',\n",
472
+ " 'UNITED STATES',\n",
473
+ " 'TÜRKİYE',\n",
474
+ " 'SEYCHELLES',\n",
475
+ " 'SERBIA',\n",
476
+ " 'CABO VERDE',\n",
477
+ " 'VENEZUELA (BOLIVARIAN REPUBLIC OF)',\n",
478
+ " 'KIRIBATI',\n",
479
+ " 'IRAN (ISLAMIC REPUBLIC OF)',\n",
480
+ " 'SOUTH SUDAN',\n",
481
+ " 'ALBANIA',\n",
482
+ " 'CZECHIA',\n",
483
+ " 'DOMINICA',\n",
484
+ " 'SAO TOME AND PRINCIPE',\n",
485
+ " 'ESWATINI',\n",
486
+ " 'CHAD',\n",
487
+ " 'EQUATORIAL GUINEA',\n",
488
+ " 'GAMBIA',\n",
489
+ " 'LIBYA',\n",
490
+ " \"CÔTE D'IVOIRE\",\n",
491
+ " 'SAINT CHRISTOPHER AND NEVIS',\n",
492
+ " 'RWANDA',\n",
493
+ " 'TONGA',\n",
494
+ " 'NIGER',\n",
495
+ " 'MICRONESIA (FEDERATED STATES OF)',\n",
496
+ " 'SYRIAN ARAB REPUBLIC',\n",
497
+ " 'NAURU',\n",
498
+ " 'PALAU',\n",
499
+ " 'NORTH MACEDONIA',\n",
500
+ " 'NETHERLANDS',\n",
501
+ " 'BOLIVIA (PLURINATIONAL STATE OF)']"
502
+ ]
503
+ },
504
+ "execution_count": 54,
505
+ "metadata": {},
506
+ "output_type": "execute_result"
507
+ }
508
+ ],
509
+ "source": [
510
+ "list(set(problem_countries))"
511
+ ]
512
+ }
513
+ ],
514
+ "metadata": {
515
+ "kernelspec": {
516
+ "display_name": "datascience",
517
+ "language": "python",
518
+ "name": "python3"
519
+ },
520
+ "language_info": {
521
+ "codemirror_mode": {
522
+ "name": "ipython",
523
+ "version": 3
524
+ },
525
+ "file_extension": ".py",
526
+ "mimetype": "text/x-python",
527
+ "name": "python",
528
+ "nbconvert_exporter": "python",
529
+ "pygments_lexer": "ipython3",
530
+ "version": "3.12.9"
531
+ }
532
+ },
533
+ "nbformat": 4,
534
+ "nbformat_minor": 5
535
+ }