diff --git "a/model_training/model_training.ipynb" "b/model_training/model_training.ipynb" new file mode 100644--- /dev/null +++ "b/model_training/model_training.ipynb" @@ -0,0 +1,5952 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "969a9695b9df48a683e377fbd49d5f43": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_4350b28d2981414b85112f6bc041db75", + "IPY_MODEL_b21c712e50e84f2fa9dba2ad054661df", + "IPY_MODEL_11f9d2ed670d4d41a9260be5bcf9379d" + ], + "layout": "IPY_MODEL_8564d0c823fb45098e778cbae88ae60a" + } + }, + "4350b28d2981414b85112f6bc041db75": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a5c5e293787f45228ee13404d1766ef4", + "placeholder": "​", + "style": "IPY_MODEL_716c805246424420893e64faf42b1c9e", + "value": "README.md: 100%" + } + }, + "b21c712e50e84f2fa9dba2ad054661df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3e8774defa6a4f71abef50d0a3d04128", + "max": 2870, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a0b1cc19904a4f65b634b88e80a96fb0", + "value": 2870 + } + }, + "11f9d2ed670d4d41a9260be5bcf9379d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0d4d91617a7a4305a62560c8919e683a", + "placeholder": "​", + "style": "IPY_MODEL_e3584530da4943959b2e4560ef9c9273", + "value": " 2.87k/2.87k [00:00<00:00, 202kB/s]" + } + }, + "8564d0c823fb45098e778cbae88ae60a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a5c5e293787f45228ee13404d1766ef4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "716c805246424420893e64faf42b1c9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3e8774defa6a4f71abef50d0a3d04128": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a0b1cc19904a4f65b634b88e80a96fb0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0d4d91617a7a4305a62560c8919e683a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3584530da4943959b2e4560ef9c9273": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "86c25a3bccfd4a78a0e58f2cce1774c8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7e8130db5e344e6da464e9be271cbb13", + "IPY_MODEL_6230c859c1c2484fa929b74d62db1fc0", + "IPY_MODEL_cb9972724a634e539e21d1687f070168" + ], + "layout": "IPY_MODEL_afd5e45a75b94ff282e0bc931d6f2433" + } + }, + "7e8130db5e344e6da464e9be271cbb13": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fd87a739af0d4ce9a6a477efd5af4c26", + "placeholder": "​", + "style": "IPY_MODEL_3a7cf67a079440928396b3db9b2032d3", + "value": "train.csv: 100%" + } + }, + "6230c859c1c2484fa929b74d62db1fc0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8fd43403b79c4becae6792707755f66f", + "max": 19041186, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_064b322ff6ee487f92a3c27300acbf8a", + "value": 19041186 + } + }, + "cb9972724a634e539e21d1687f070168": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9f07ba5029e146a499469c4304fce3a5", + "placeholder": "​", + "style": "IPY_MODEL_ce67b994ed0d441a8b8d41f0dd17ea11", + "value": " 19.0M/19.0M [00:00<00:00, 89.7MB/s]" + } + }, + "afd5e45a75b94ff282e0bc931d6f2433": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd87a739af0d4ce9a6a477efd5af4c26": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3a7cf67a079440928396b3db9b2032d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8fd43403b79c4becae6792707755f66f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "064b322ff6ee487f92a3c27300acbf8a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9f07ba5029e146a499469c4304fce3a5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ce67b994ed0d441a8b8d41f0dd17ea11": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e85629e675954a79b5a9e88c537c7765": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8caaeb2f2e844d39a7b7824341c5a72e", + "IPY_MODEL_a5332cc275734a19b3b99528ed06557c", + "IPY_MODEL_51e2788e90c4410e8da1b80c27115059" + ], + "layout": "IPY_MODEL_6286dc27fdc549b1b1f38b90a059dd59" + } + }, + "8caaeb2f2e844d39a7b7824341c5a72e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_56e567bc0ff342748358c101d004058e", + "placeholder": "​", + "style": "IPY_MODEL_71f63c090e99417b8198294b8ab85800", + "value": "valid.csv: 100%" + } + }, + "a5332cc275734a19b3b99528ed06557c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4edbf11520fd4051bd9889c82f136a1f", + "max": 2376932, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d6b0e844a7c54122a0a5a2977dabaf6e", + "value": 2376932 + } + }, + "51e2788e90c4410e8da1b80c27115059": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b582eae1b03b4b84b9fe4bd27f9d48b2", + "placeholder": "​", + "style": "IPY_MODEL_8c0c77d2ac97477193c73bfc968047ee", + "value": " 2.38M/2.38M [00:01<00:00, 2.14MB/s]" + } + }, + "6286dc27fdc549b1b1f38b90a059dd59": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "56e567bc0ff342748358c101d004058e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71f63c090e99417b8198294b8ab85800": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4edbf11520fd4051bd9889c82f136a1f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d6b0e844a7c54122a0a5a2977dabaf6e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b582eae1b03b4b84b9fe4bd27f9d48b2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8c0c77d2ac97477193c73bfc968047ee": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "72c5d67b5a004bc38805b8373ccc839d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7d79c0bc4d644a808c9b145f5246a4fa", + "IPY_MODEL_6afa9e0ae1724ead9fc9d00b44debb50", + "IPY_MODEL_35f3c05c107a43be83c4cd2288d7f978" + ], + "layout": "IPY_MODEL_17e2f2aa24a6449b9d34cd4e45380d17" + } + }, + "7d79c0bc4d644a808c9b145f5246a4fa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_78d4c86600db4121bdf23da1ab83a37b", + "placeholder": "​", + "style": "IPY_MODEL_543cc147bc50443eae9d0c2db3f2e01c", + "value": "test.csv: 100%" + } + }, + "6afa9e0ae1724ead9fc9d00b44debb50": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c3c5150c1b294e6680d5319566ea6fdb", + "max": 2384586, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3dafd94c17044054bd6003cfcedf253d", + "value": 2384586 + } + }, + "35f3c05c107a43be83c4cd2288d7f978": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_929c1172139f4b90aa143013baf32013", + "placeholder": "​", + "style": "IPY_MODEL_345e2451df894daaa942664ab8cae344", + "value": " 2.38M/2.38M [00:01<00:00, 2.19MB/s]" + } + }, + "17e2f2aa24a6449b9d34cd4e45380d17": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "78d4c86600db4121bdf23da1ab83a37b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "543cc147bc50443eae9d0c2db3f2e01c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c3c5150c1b294e6680d5319566ea6fdb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3dafd94c17044054bd6003cfcedf253d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "929c1172139f4b90aa143013baf32013": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "345e2451df894daaa942664ab8cae344": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9852cf0778254f4d8bbb03404623a2a8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6dafe1df4736425ead5fd0cb4a889423", + "IPY_MODEL_1ef079bfbe164c3ca3239abd56efea3f", + "IPY_MODEL_9a7e4388b50344ae9e549caad193d7e4" + ], + "layout": "IPY_MODEL_f8cdddb6e22945ce8321c4f0a4adcc8e" + } + }, + "6dafe1df4736425ead5fd0cb4a889423": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ae0df42c5e4f44608f3e95d3bba9a65a", + "placeholder": "​", + "style": "IPY_MODEL_fd0a5bb2b04a420aa0a7bf9b82bb6aea", + "value": "Generating train split: 100%" + } + }, + "1ef079bfbe164c3ca3239abd56efea3f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ce367905f3f34b4598113c46e4492eb7", + "max": 18369, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_bacf7ca0fd4746d29d3f69b26aa04f36", + "value": 18369 + } + }, + "9a7e4388b50344ae9e549caad193d7e4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_17c421b56f4a4255b4ebf61673b538eb", + "placeholder": "​", + "style": "IPY_MODEL_069398ee29994b5eb8107cdb3b4ab9df", + "value": " 18369/18369 [00:00<00:00, 27357.91 examples/s]" + } + }, + "f8cdddb6e22945ce8321c4f0a4adcc8e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae0df42c5e4f44608f3e95d3bba9a65a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fd0a5bb2b04a420aa0a7bf9b82bb6aea": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ce367905f3f34b4598113c46e4492eb7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bacf7ca0fd4746d29d3f69b26aa04f36": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "17c421b56f4a4255b4ebf61673b538eb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "069398ee29994b5eb8107cdb3b4ab9df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2b68a30bb79e482987cb0e3a94b5784d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c8efc2a709184ee389bbf2dde8b5abe3", + "IPY_MODEL_376a714cc9424ce69e4ed6e8d9412e3d", + "IPY_MODEL_efcbc34cdb594d81bfd919a66eafa91f" + ], + "layout": "IPY_MODEL_3e9d3ec6971b4fbbb9d680af15541425" + } + }, + "c8efc2a709184ee389bbf2dde8b5abe3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0b3b02489bdf425d82e2597821d8cf67", + "placeholder": "​", + "style": "IPY_MODEL_5625773ebcac4695a0dd3e6edeb389d3", + "value": "Generating validation split: 100%" + } + }, + "376a714cc9424ce69e4ed6e8d9412e3d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_79e1daef8f1d4d68a9c99d3a9f079894", + "max": 2297, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1c49eb33c9094aeba795c0250aa3f05a", + "value": 2297 + } + }, + "efcbc34cdb594d81bfd919a66eafa91f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f31ffb34c5b245bd90051cb893c4c5b7", + "placeholder": "​", + "style": "IPY_MODEL_9cd3bce2b222454d8c8a782a02f89010", + "value": " 2297/2297 [00:00<00:00, 21830.29 examples/s]" + } + }, + "3e9d3ec6971b4fbbb9d680af15541425": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0b3b02489bdf425d82e2597821d8cf67": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5625773ebcac4695a0dd3e6edeb389d3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "79e1daef8f1d4d68a9c99d3a9f079894": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1c49eb33c9094aeba795c0250aa3f05a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f31ffb34c5b245bd90051cb893c4c5b7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9cd3bce2b222454d8c8a782a02f89010": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "104336ba9ea6451b865a840f0955e6a6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c3d4b37cba3c4e82a020d47be7cb2da4", + "IPY_MODEL_ed8ca6389c20429d89a6063892f1f930", + "IPY_MODEL_b4e82f4677b24836a938edaa4a287efb" + ], + "layout": "IPY_MODEL_2bf4e5ad02cd401f9e48f7db327bd7ef" + } + }, + "c3d4b37cba3c4e82a020d47be7cb2da4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a72a57ed13704c93be7daab90c2bc0fe", + "placeholder": "​", + "style": "IPY_MODEL_8d773d9125534d2b9936785a2579c9db", + "value": "Generating test split: 100%" + } + }, + "ed8ca6389c20429d89a6063892f1f930": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_430c069e188b48908e54407eb21c3a54", + "max": 2296, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e3326ade6f5b4a98beb3f587da5e269a", + "value": 2296 + } + }, + "b4e82f4677b24836a938edaa4a287efb": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3fceb97e1da74b36807d677751a45bf5", + "placeholder": "​", + "style": "IPY_MODEL_26eb1a58d17b455e897c0ab211dfc745", + "value": " 2296/2296 [00:00<00:00, 17594.28 examples/s]" + } + }, + "2bf4e5ad02cd401f9e48f7db327bd7ef": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a72a57ed13704c93be7daab90c2bc0fe": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8d773d9125534d2b9936785a2579c9db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "430c069e188b48908e54407eb21c3a54": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3326ade6f5b4a98beb3f587da5e269a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3fceb97e1da74b36807d677751a45bf5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "26eb1a58d17b455e897c0ab211dfc745": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b37fe8dfad314cd081cdd58a8504bb6d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_211eba115ba14680b69f1b7142df49ef", + "IPY_MODEL_4a01f780e26749c79a2a17953ac1dd65", + "IPY_MODEL_9eb2b8dada594f3995eda1d877b53be0" + ], + "layout": "IPY_MODEL_8d3d3c21a22042d48b4f600ce842d415" + } + }, + "211eba115ba14680b69f1b7142df49ef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5cd78a174d91485cbc3ac0aca56e8ac8", + "placeholder": "​", + "style": "IPY_MODEL_39bf12129fce4932ad1776730cee7c63", + "value": "tokenizer_config.json: 100%" + } + }, + "4a01f780e26749c79a2a17953ac1dd65": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57b08005ef11401e91522ee47a695dcf", + "max": 48, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ac7d319515944a67a3028819f1774e5a", + "value": 48 + } + }, + "9eb2b8dada594f3995eda1d877b53be0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_fa6b886900c245368686975d52c3d376", + "placeholder": "​", + "style": "IPY_MODEL_d2e010a13f8742f5a254f20d72886243", + "value": " 48.0/48.0 [00:00<00:00, 4.65kB/s]" + } + }, + "8d3d3c21a22042d48b4f600ce842d415": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5cd78a174d91485cbc3ac0aca56e8ac8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "39bf12129fce4932ad1776730cee7c63": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "57b08005ef11401e91522ee47a695dcf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac7d319515944a67a3028819f1774e5a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "fa6b886900c245368686975d52c3d376": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2e010a13f8742f5a254f20d72886243": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "138bcea38b7e449fb31e3b744c7ac2c3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_5089e4ae760b4c0cba192d3c3999ef32", + "IPY_MODEL_bfb9dd560c8f4045b6a5e9a3f3d43a2f", + "IPY_MODEL_21e40d89fa934af0985cdcedac0c140f" + ], + "layout": "IPY_MODEL_c9408786fab348a4900c195547870d6c" + } + }, + "5089e4ae760b4c0cba192d3c3999ef32": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e7b17b562dd94058a3d4c0a4bd4bfdd8", + "placeholder": "​", + "style": "IPY_MODEL_cb1cd903f9594c6ab0efba6ad23bb88b", + "value": "vocab.txt: 100%" + } + }, + "bfb9dd560c8f4045b6a5e9a3f3d43a2f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d419d9fcdb5e4ca1b5bb7928273f11b0", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fe0b6c85bdef46f58769a419d88be211", + "value": 231508 + } + }, + "21e40d89fa934af0985cdcedac0c140f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bea2070e5e284f8bab5bc20e863e01be", + "placeholder": "​", + "style": "IPY_MODEL_dd4aeae99d134cfba3e773a01e07e173", + "value": " 232k/232k [00:00<00:00, 1.04MB/s]" + } + }, + "c9408786fab348a4900c195547870d6c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e7b17b562dd94058a3d4c0a4bd4bfdd8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cb1cd903f9594c6ab0efba6ad23bb88b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d419d9fcdb5e4ca1b5bb7928273f11b0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fe0b6c85bdef46f58769a419d88be211": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "bea2070e5e284f8bab5bc20e863e01be": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dd4aeae99d134cfba3e773a01e07e173": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "dcadede5a07f4d3e9ca792a19dc21715": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_08dc5d62c5a7483d9afb6d8b5808fe83", + "IPY_MODEL_e0f60ac76ba943d79f3d9d7c4f319447", + "IPY_MODEL_bb072b9abac94e8bbbbf059350beb0e7" + ], + "layout": "IPY_MODEL_a55e764b4c3a4510b514bdcd6c270b52" + } + }, + "08dc5d62c5a7483d9afb6d8b5808fe83": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5cc1100f2b98480eb5e5efeb5afa29f5", + "placeholder": "​", + "style": "IPY_MODEL_79563cc9fcdb44c3981eda18338b89aa", + "value": "tokenizer.json: 100%" + } + }, + "e0f60ac76ba943d79f3d9d7c4f319447": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_59f8aa1a8580430a9c74c84ea632f9bc", + "max": 466062, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4d727f0787424370b253727ecc25ae9e", + "value": 466062 + } + }, + "bb072b9abac94e8bbbbf059350beb0e7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6aeb7bc82ac94a96b5fe781aed9a85ff", + "placeholder": "​", + "style": "IPY_MODEL_b3ba1e7126d2489e98c08bd1b07d445a", + "value": " 466k/466k [00:00<00:00, 2.12MB/s]" + } + }, + "a55e764b4c3a4510b514bdcd6c270b52": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5cc1100f2b98480eb5e5efeb5afa29f5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "79563cc9fcdb44c3981eda18338b89aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "59f8aa1a8580430a9c74c84ea632f9bc": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4d727f0787424370b253727ecc25ae9e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6aeb7bc82ac94a96b5fe781aed9a85ff": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b3ba1e7126d2489e98c08bd1b07d445a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "62db1f3699d14c06ae5556e43ce45439": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_bd174018409c4149a3fab2404378cac8", + "IPY_MODEL_ae95c662ec0b4022be527c32f4bd98a6", + "IPY_MODEL_36df9f39e0524c2e98fdc9e9bad2b9a1" + ], + "layout": "IPY_MODEL_026d708491c94039965cccf9abbfded2" + } + }, + "bd174018409c4149a3fab2404378cac8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9856a4b5e41847c099d5e4a2a3deccb0", + "placeholder": "​", + "style": "IPY_MODEL_82890a0d1e3949fe8f06d28ddb9af83e", + "value": "config.json: 100%" + } + }, + "ae95c662ec0b4022be527c32f4bd98a6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_18556bd89c404ef5a46b379fbc7bb357", + "max": 483, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_10cfa9deb9614486895d25d2947cd556", + "value": 483 + } + }, + "36df9f39e0524c2e98fdc9e9bad2b9a1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33ac92e359d9431ba9e92c220bc8420f", + "placeholder": "​", + "style": "IPY_MODEL_1122301b62754bcaa2cb1a66bee29020", + "value": " 483/483 [00:00<00:00, 47.5kB/s]" + } + }, + "026d708491c94039965cccf9abbfded2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9856a4b5e41847c099d5e4a2a3deccb0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "82890a0d1e3949fe8f06d28ddb9af83e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "18556bd89c404ef5a46b379fbc7bb357": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "10cfa9deb9614486895d25d2947cd556": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "33ac92e359d9431ba9e92c220bc8420f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1122301b62754bcaa2cb1a66bee29020": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "cb1e1f2445b541759d24c2db11e23c88": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d3ffe584171e4a5cad9a8f5a6fe192f3", + "IPY_MODEL_69be4aadb0314fa1b72485a8af33536e", + "IPY_MODEL_ea8172ff955f40e790dd5a4ee68bb63b" + ], + "layout": "IPY_MODEL_90586a4566ab4e1d9f33b9cc99fecf78" + } + }, + "d3ffe584171e4a5cad9a8f5a6fe192f3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3c7cfd39564245a0b380b9f093b52f79", + "placeholder": "​", + "style": "IPY_MODEL_24f18614246a4c719c47f03eec886343", + "value": "model.safetensors: 100%" + } + }, + "69be4aadb0314fa1b72485a8af33536e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d97e44718172490f8cedbce280aa6559", + "max": 267954768, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_902888d1f7754f66860a5a203e9640a5", + "value": 267954768 + } + }, + "ea8172ff955f40e790dd5a4ee68bb63b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_506a3c97660d4e79bc2a57eee426e9a3", + "placeholder": "​", + "style": "IPY_MODEL_299fd09970444a829dd46059748d84b3", + "value": " 268M/268M [00:04<00:00, 25.1MB/s]" + } + }, + "90586a4566ab4e1d9f33b9cc99fecf78": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3c7cfd39564245a0b380b9f093b52f79": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "24f18614246a4c719c47f03eec886343": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d97e44718172490f8cedbce280aa6559": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "902888d1f7754f66860a5a203e9640a5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "506a3c97660d4e79bc2a57eee426e9a3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "299fd09970444a829dd46059748d84b3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Fake News Detection System (Model Training and Evaluation)" + ], + "metadata": { + "id": "a_vBTqbyHMMq" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "teRKigcfHFxo", + "outputId": "ba3395c3-81ac-4fb7-a9ab-83ee3e90a702" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting datasets\n", + " Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.49.0)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (1.6.1)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.11/dist-packages (3.9.1)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (2.2.2)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (2.0.2)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (3.10.0)\n", + "Requirement already satisfied: seaborn in /usr/local/lib/python3.11/dist-packages (0.13.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (4.67.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets) (3.18.0)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (18.1.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets) (2.32.3)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets)\n", + " Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets) (3.11.14)\n", + "Requirement already satisfied: huggingface-hub>=0.24.0 in /usr/local/lib/python3.11/dist-packages (from datasets) (0.29.3)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.1)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.14.1)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn) (3.6.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.11/dist-packages (from nltk) (8.1.8)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas) (2025.1)\n", + "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.12.2)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", + " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", + " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", + " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", + " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", + " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", + " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.3.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (4.56.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (1.4.8)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (11.1.0)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib) (3.2.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (6.2.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (0.3.0)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets) (1.18.3)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets) (2025.1.31)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", + "Downloading datasets-3.4.1-py3-none-any.whl (487 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m487.4/487.4 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m38.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m32.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m40.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m90.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.12.0-py3-none-any.whl (183 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: xxhash, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, fsspec, dill, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, nvidia-cusolver-cu12, datasets\n", + " Attempting uninstall: nvidia-nvjitlink-cu12\n", + " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", + " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", + " Attempting uninstall: nvidia-curand-cu12\n", + " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", + " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", + " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", + " Attempting uninstall: nvidia-cufft-cu12\n", + " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", + " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", + " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", + " Attempting uninstall: nvidia-cuda-runtime-cu12\n", + " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", + " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-cupti-cu12\n", + " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cublas-cu12\n", + " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", + " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", + " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2025.3.0\n", + " Uninstalling fsspec-2025.3.0:\n", + " Successfully uninstalled fsspec-2025.3.0\n", + " Attempting uninstall: nvidia-cusparse-cu12\n", + " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", + " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", + " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", + " Attempting uninstall: nvidia-cudnn-cu12\n", + " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", + " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", + " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", + " Attempting uninstall: nvidia-cusolver-cu12\n", + " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", + " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", + " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2025.3.0 requires fsspec==2025.3.0, but you have fsspec 2024.12.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed datasets-3.4.1 dill-0.3.8 fsspec-2024.12.0 multiprocess-0.70.16 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 xxhash-3.5.0\n" + ] + } + ], + "source": [ + "# Install required libraries\n", + "!pip install datasets transformers scikit-learn nltk pandas numpy torch matplotlib seaborn tqdm\n", + "\n", + "# Import libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import re\n", + "import string\n", + "import nltk\n", + "import os\n", + "from nltk.corpus import stopwords\n", + "from nltk.stem import WordNetLemmatizer\n", + "from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report\n", + "from sklearn.utils.class_weight import compute_class_weight\n", + "from datasets import load_dataset\n", + "import torch\n", + "from torch import nn\n", + "from torch.utils.data import Dataset, DataLoader\n", + "from transformers import DistilBertTokenizer, DistilBertModel, AdamW, get_linear_schedule_with_warmup\n", + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "source": [ + "# Note: Verify berfore running the moodels\n", + "import sklearn\n", + "import transformers\n", + "print(f\"NumPy version: {np.__version__}\")\n", + "print(f\"scikit-learn version: {sklearn.__version__}\")\n", + "\n", + "print(f\"torch: {torch.__version__}\")\n", + "print(f\"transformers: {transformers.__version__}\")\n", + "print(f\"nltk: {nltk.__version__}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PQLVNSbfNeYZ", + "outputId": "dc0148f2-7dc1-4365-e9c5-a1165c57cbe1" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "NumPy version: 2.0.2\n", + "scikit-learn version: 1.6.1\n", + "torch: 2.6.0+cu124\n", + "transformers: 4.49.0\n", + "nltk: 3.9.1\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Download NLTK resources\n", + "nltk.download('stopwords')\n", + "nltk.download('wordnet')\n", + "nltk.download('punkt')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ga3xIASeHXxI", + "outputId": "26ac43ce-42a0-4ef3-fb41-ff6282977833" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", + "[nltk_data] Unzipping corpora/stopwords.zip.\n", + "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", + "[nltk_data] Downloading package punkt to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Check for GPU\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "print(f\"Using device: {device}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "HixlQ3UfHcPC", + "outputId": "55d45d92-8b1e-48b3-fe77-2e5f4dd7ea4c" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using device: cuda\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 1. LOADING DATASETS" + ], + "metadata": { + "id": "UJEhDzkyjel-" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"LOASDING DATASETS...\")\n", + "\n", + "# Load ISOT dataset\n", + "print(\"Loading ISOT dataset...\")\n", + "# Load LIAR dataset\n", + "liar_dataset = load_dataset(\"chengxuphd/liar2\")\n", + "print(f\"LIAR dataset loaded. Available splits: {liar_dataset.keys()}\")\n", + "\n", + "# Load ISOT dataset\n", + "from google.colab import drive\n", + "drive.mount('/content/drive')\n", + "fake_news_path = '/content/drive/MyDrive/Fake.csv'\n", + "true_news_path = '/content/drive/MyDrive/True.csv'\n", + "fake_df = pd.read_csv(fake_news_path)\n", + "true_df = pd.read_csv(true_news_path)\n", + "print(\"ISOT dataset loaded.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 452, + "referenced_widgets": [ + "969a9695b9df48a683e377fbd49d5f43", + "4350b28d2981414b85112f6bc041db75", + "b21c712e50e84f2fa9dba2ad054661df", + "11f9d2ed670d4d41a9260be5bcf9379d", + "8564d0c823fb45098e778cbae88ae60a", + "a5c5e293787f45228ee13404d1766ef4", + "716c805246424420893e64faf42b1c9e", + "3e8774defa6a4f71abef50d0a3d04128", + "a0b1cc19904a4f65b634b88e80a96fb0", + "0d4d91617a7a4305a62560c8919e683a", + "e3584530da4943959b2e4560ef9c9273", + "86c25a3bccfd4a78a0e58f2cce1774c8", + "7e8130db5e344e6da464e9be271cbb13", + "6230c859c1c2484fa929b74d62db1fc0", + "cb9972724a634e539e21d1687f070168", + "afd5e45a75b94ff282e0bc931d6f2433", + "fd87a739af0d4ce9a6a477efd5af4c26", + "3a7cf67a079440928396b3db9b2032d3", + "8fd43403b79c4becae6792707755f66f", + "064b322ff6ee487f92a3c27300acbf8a", + "9f07ba5029e146a499469c4304fce3a5", + "ce67b994ed0d441a8b8d41f0dd17ea11", + "e85629e675954a79b5a9e88c537c7765", + "8caaeb2f2e844d39a7b7824341c5a72e", + "a5332cc275734a19b3b99528ed06557c", + "51e2788e90c4410e8da1b80c27115059", + "6286dc27fdc549b1b1f38b90a059dd59", + "56e567bc0ff342748358c101d004058e", + "71f63c090e99417b8198294b8ab85800", + "4edbf11520fd4051bd9889c82f136a1f", + "d6b0e844a7c54122a0a5a2977dabaf6e", + "b582eae1b03b4b84b9fe4bd27f9d48b2", + "8c0c77d2ac97477193c73bfc968047ee", + "72c5d67b5a004bc38805b8373ccc839d", + "7d79c0bc4d644a808c9b145f5246a4fa", + "6afa9e0ae1724ead9fc9d00b44debb50", + "35f3c05c107a43be83c4cd2288d7f978", + "17e2f2aa24a6449b9d34cd4e45380d17", + "78d4c86600db4121bdf23da1ab83a37b", + "543cc147bc50443eae9d0c2db3f2e01c", + "c3c5150c1b294e6680d5319566ea6fdb", + "3dafd94c17044054bd6003cfcedf253d", + "929c1172139f4b90aa143013baf32013", + "345e2451df894daaa942664ab8cae344", + "9852cf0778254f4d8bbb03404623a2a8", + "6dafe1df4736425ead5fd0cb4a889423", + "1ef079bfbe164c3ca3239abd56efea3f", + "9a7e4388b50344ae9e549caad193d7e4", + "f8cdddb6e22945ce8321c4f0a4adcc8e", + "ae0df42c5e4f44608f3e95d3bba9a65a", + "fd0a5bb2b04a420aa0a7bf9b82bb6aea", + "ce367905f3f34b4598113c46e4492eb7", + "bacf7ca0fd4746d29d3f69b26aa04f36", + "17c421b56f4a4255b4ebf61673b538eb", + "069398ee29994b5eb8107cdb3b4ab9df", + "2b68a30bb79e482987cb0e3a94b5784d", + "c8efc2a709184ee389bbf2dde8b5abe3", + "376a714cc9424ce69e4ed6e8d9412e3d", + "efcbc34cdb594d81bfd919a66eafa91f", + "3e9d3ec6971b4fbbb9d680af15541425", + "0b3b02489bdf425d82e2597821d8cf67", + "5625773ebcac4695a0dd3e6edeb389d3", + "79e1daef8f1d4d68a9c99d3a9f079894", + "1c49eb33c9094aeba795c0250aa3f05a", + "f31ffb34c5b245bd90051cb893c4c5b7", + "9cd3bce2b222454d8c8a782a02f89010", + "104336ba9ea6451b865a840f0955e6a6", + "c3d4b37cba3c4e82a020d47be7cb2da4", + "ed8ca6389c20429d89a6063892f1f930", + "b4e82f4677b24836a938edaa4a287efb", + "2bf4e5ad02cd401f9e48f7db327bd7ef", + "a72a57ed13704c93be7daab90c2bc0fe", + "8d773d9125534d2b9936785a2579c9db", + "430c069e188b48908e54407eb21c3a54", + "e3326ade6f5b4a98beb3f587da5e269a", + "3fceb97e1da74b36807d677751a45bf5", + "26eb1a58d17b455e897c0ab211dfc745" + ] + }, + "id": "P9jbuHNqHgPd", + "outputId": "4a67fecc-a870-4df4-ac51-8b82ed571942" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "LOASDING DATASETS...\n", + "Loading ISOT dataset...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "README.md: 0%| | 0.00/2.87k [00:00= 4 else 0,\n", + " 'source': 'LIAR'\n", + " }\n", + " for item in liar_dataset['train']\n", + "])\n", + "liar_val_df = pd.DataFrame([\n", + " {\n", + " 'statement': item['statement'],\n", + " 'label_original': item['label'],\n", + " 'label': 1 if item['label'] >= 4 else 0,\n", + " 'source': 'LIAR'\n", + " }\n", + " for item in liar_dataset['validation']\n", + "])\n", + "liar_test_df = pd.DataFrame([\n", + " {\n", + " 'statement': item['statement'],\n", + " 'label_original': item['label'],\n", + " 'label': 1 if item['label'] >= 4 else 0,\n", + " 'source': 'LIAR'\n", + " }\n", + " for item in liar_dataset['test']\n", + "])\n", + "\n", + "# Combine the liar dataframes\n", + "liar_df = pd.concat([liar_train_df, liar_val_df, liar_test_df], ignore_index=True)\n", + "print(f\"Combined LIAR dataset shape: {liar_df.shape}\")\n", + "\n", + "# Check the class distribution\n", + "liar_class_counts = liar_df['label'].value_counts()\n", + "print(\"\\nLIAR binary class distribution:\")\n", + "print(f\"Fake news (0): {liar_class_counts[0]} ({liar_class_counts[0] / len(liar_df) * 100:.2f}%)\")\n", + "print(f\"Real news (1): {liar_class_counts[1]} ({liar_class_counts[1] / len(liar_df) * 100:.2f}%)\")\n", + "\n", + "# Rename columns to match\n", + "isot_df = isot_df.rename(columns={'text': 'statement'})\n", + "\n", + "# Merge datasets\n", + "isot_selected = isot_df[['statement', 'label', 'source']]\n", + "liar_selected = liar_df[['statement', 'label', 'source']]\n", + "combined_df = pd.concat([isot_selected, liar_selected], ignore_index=True)\n", + "combined_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)\n", + "print(f\"Combined dataset shape: {combined_df.shape}\")\n", + "\n", + "# Show class balance\n", + "combined_class_counts = combined_df['label'].value_counts()\n", + "print(f\"Fake news (0): {combined_class_counts[0]} ({combined_class_counts[0] / len(combined_df) * 100:.2f}%)\")\n", + "print(f\"Real news (1): {combined_class_counts[1]} ({combined_class_counts[1] / len(combined_df) * 100:.2f}%)\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "omgzmFpiHjO8", + "outputId": "40c428fb-43e3-49a7-95c5-ea29957e0779" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "PREPARING DATA...\n", + "Combined ISOT dataset shape: (44898, 6)\n", + "Combined LIAR dataset shape: (22962, 4)\n", + "\n", + "LIAR binary class distribution:\n", + "Fake news (0): 16948 (73.81%)\n", + "Real news (1): 6014 (26.19%)\n", + "Combined dataset shape: (67860, 3)\n", + "Fake news (0): 40429 (59.58%)\n", + "Real news (1): 27431 (40.42%)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 3. TEXT CLEANING" + ], + "metadata": { + "id": "qYMFy2ULj2uL" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"\\nCLEANING TEXT...\")\n", + "\n", + "# Local import\n", + "import nltk\n", + "nltk.download('punkt_tab')\n", + "\n", + "def clean_text(text):\n", + " # Handle NaN values\n", + " if not isinstance(text, str):\n", + " return \"\"\n", + "\n", + " # Convert to lowercase\n", + " text = text.lower()\n", + "\n", + " # Remove URLs, HTML tags, punctuation and numbers\n", + " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", + " text = re.sub(r'<.*?>', '', text)\n", + " text = re.sub(r'[^\\w\\s]', '', text)\n", + " text = re.sub(r'\\d+', '', text)\n", + "\n", + " # Tokenize\n", + " tokens = nltk.word_tokenize(text)\n", + "\n", + " # Remove stopwords and lemmatize\n", + " stop_words = set(stopwords.words('english'))\n", + " lemmatizer = WordNetLemmatizer()\n", + " tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]\n", + "\n", + " # Join tokens back into text\n", + " cleaned_text = ' '.join(tokens)\n", + " return cleaned_text\n", + "\n", + "# Apply cleaning function\n", + "print(\"Cleaning text...\")\n", + "combined_df['cleaned_statement'] = combined_df['statement'].apply(clean_text)\n", + "# Show example\n", + "print(\"\\nExample of cleaned text:\")\n", + "print(f\"Original: {combined_df['statement'].iloc[0][:100]}...\")\n", + "print(f\"Cleaned: {combined_df['cleaned_statement'].iloc[0][:100]}...\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "iyLfqhOjHrM2", + "outputId": "b498d1b1-2b42-4f6b-d2fb-c876f3903070" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "CLEANING TEXT...\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "[nltk_data] Downloading package punkt_tab to /root/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt_tab.zip.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cleaning text...\n", + "\n", + "Example of cleaned text:\n", + "Original: (Editors note: Attention to language in the second paragraph that some readers may find offensive.)...\n", + "Cleaned: editor note attention language second paragraph reader may find offensive reuters local north caroli...\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 4. FEATURE EXTRACTION AND DATA SPLITTING" + ], + "metadata": { + "id": "js1cJRLFkKRS" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"\\nFEATURE EXTRACTION AND SPLITTING DATA...\")\n", + "\n", + "# Split data\n", + "X = combined_df['cleaned_statement']\n", + "y = combined_df['label']\n", + "\n", + "# First split: 80% train+val, 20% test\n", + "X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n", + "\n", + "# Second split: 80% train, 20% validation (from the remaining 80%)\n", + "X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp)\n", + "\n", + "print(f\"Training set: {len(X_train)} samples\")\n", + "print(f\"Validation set: {len(X_val)} samples\")\n", + "print(f\"Test set: {len(X_test)} samples\")\n", + "\n", + "# TF-IDF Vectorization\n", + "print(\"Creating TF-IDF features...\")\n", + "tfidf_vectorizer = TfidfVectorizer(max_features=10000)\n", + "X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)\n", + "X_val_tfidf = tfidf_vectorizer.transform(X_val)\n", + "X_test_tfidf = tfidf_vectorizer.transform(X_test)\n", + "\n", + "# DistilBERT tokenizer setup\n", + "print(\"Setting up DistilBERT tokenizer...\")\n", + "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n", + "\n", + "# Dataset class\n", + "class NewsDataset(Dataset):\n", + " def __init__(self, texts, labels, tokenizer, max_length=128):\n", + " self.texts = texts\n", + " self.labels = labels\n", + " self.tokenizer = tokenizer\n", + " self.max_length = max_length\n", + "\n", + " def __len__(self):\n", + " return len(self.texts)\n", + "\n", + " def __getitem__(self, idx):\n", + " text = self.texts.iloc[idx]\n", + " label = self.labels.iloc[idx]\n", + "\n", + " encoding = self.tokenizer(\n", + " text,\n", + " truncation=True,\n", + " padding='max_length',\n", + " max_length=self.max_length,\n", + " return_tensors='pt'\n", + " )\n", + "\n", + " return {\n", + " 'input_ids': encoding['input_ids'].flatten(),\n", + " 'attention_mask': encoding['attention_mask'].flatten(),\n", + " 'label': torch.tensor(label, dtype=torch.long)\n", + " }\n", + "\n", + "# Create datasets\n", + "train_dataset = NewsDataset(X_train, y_train, tokenizer)\n", + "val_dataset = NewsDataset(X_val, y_val, tokenizer)\n", + "test_dataset = NewsDataset(X_test, y_test, tokenizer)\n", + "\n", + "# Create dataloaders\n", + "batch_size = 16\n", + "train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)\n", + "val_dataloader = DataLoader(val_dataset, batch_size=batch_size)\n", + "test_dataloader = DataLoader(test_dataset, batch_size=batch_size)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 267, + "referenced_widgets": [ + "b37fe8dfad314cd081cdd58a8504bb6d", + "211eba115ba14680b69f1b7142df49ef", + "4a01f780e26749c79a2a17953ac1dd65", + "9eb2b8dada594f3995eda1d877b53be0", + "8d3d3c21a22042d48b4f600ce842d415", + "5cd78a174d91485cbc3ac0aca56e8ac8", + "39bf12129fce4932ad1776730cee7c63", + "57b08005ef11401e91522ee47a695dcf", + "ac7d319515944a67a3028819f1774e5a", + "fa6b886900c245368686975d52c3d376", + "d2e010a13f8742f5a254f20d72886243", + "138bcea38b7e449fb31e3b744c7ac2c3", + "5089e4ae760b4c0cba192d3c3999ef32", + "bfb9dd560c8f4045b6a5e9a3f3d43a2f", + "21e40d89fa934af0985cdcedac0c140f", + "c9408786fab348a4900c195547870d6c", + "e7b17b562dd94058a3d4c0a4bd4bfdd8", + "cb1cd903f9594c6ab0efba6ad23bb88b", + "d419d9fcdb5e4ca1b5bb7928273f11b0", + "fe0b6c85bdef46f58769a419d88be211", + "bea2070e5e284f8bab5bc20e863e01be", + "dd4aeae99d134cfba3e773a01e07e173", + "dcadede5a07f4d3e9ca792a19dc21715", + "08dc5d62c5a7483d9afb6d8b5808fe83", + "e0f60ac76ba943d79f3d9d7c4f319447", + "bb072b9abac94e8bbbbf059350beb0e7", + "a55e764b4c3a4510b514bdcd6c270b52", + "5cc1100f2b98480eb5e5efeb5afa29f5", + "79563cc9fcdb44c3981eda18338b89aa", + "59f8aa1a8580430a9c74c84ea632f9bc", + "4d727f0787424370b253727ecc25ae9e", + "6aeb7bc82ac94a96b5fe781aed9a85ff", + "b3ba1e7126d2489e98c08bd1b07d445a", + "62db1f3699d14c06ae5556e43ce45439", + "bd174018409c4149a3fab2404378cac8", + "ae95c662ec0b4022be527c32f4bd98a6", + "36df9f39e0524c2e98fdc9e9bad2b9a1", + "026d708491c94039965cccf9abbfded2", + "9856a4b5e41847c099d5e4a2a3deccb0", + "82890a0d1e3949fe8f06d28ddb9af83e", + "18556bd89c404ef5a46b379fbc7bb357", + "10cfa9deb9614486895d25d2947cd556", + "33ac92e359d9431ba9e92c220bc8420f", + "1122301b62754bcaa2cb1a66bee29020" + ] + }, + "id": "8MbEmjE7Hv_L", + "outputId": "b3bcd2a5-8632-4d1c-f282-1e75a8ab6189" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "FEATURE EXTRACTION AND SPLITTING DATA...\n", + "Training set: 40716 samples\n", + "Validation set: 13572 samples\n", + "Test set: 13572 samples\n", + "Creating TF-IDF features...\n", + "Setting up DistilBERT tokenizer...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/48.0 [00:00 best_val_f1:\n", + " best_val_f1 = val_f1\n", + " best_model_state = distilbert_model.state_dict().copy()\n", + " print(f\"New best model saved! F1: {best_val_f1:.4f}\")" + ], + "metadata": { + "id": "k-i_7ijiIxQO", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 520, + "referenced_widgets": [ + "cb1e1f2445b541759d24c2db11e23c88", + "d3ffe584171e4a5cad9a8f5a6fe192f3", + "69be4aadb0314fa1b72485a8af33536e", + "ea8172ff955f40e790dd5a4ee68bb63b", + "90586a4566ab4e1d9f33b9cc99fecf78", + "3c7cfd39564245a0b380b9f093b52f79", + "24f18614246a4c719c47f03eec886343", + "d97e44718172490f8cedbce280aa6559", + "902888d1f7754f66860a5a203e9640a5", + "506a3c97660d4e79bc2a57eee426e9a3", + "299fd09970444a829dd46059748d84b3" + ] + }, + "outputId": "9835d8dd-532f-4229-a862-8f86269cfde0" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "-- Training DistilBERT --\n", + "NOTE: DistilBERT training might take 30-45 minutes if Colab's T4 GPU is used.\n", + " (Depends on no. of epochs+ CPU/GPU/TPU)\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "model.safetensors: 0%| | 0.00/268M [00:00 0:\n", + " wordcloud = WordCloud(width=800, height=400, background_color='white',\n", + " max_words=200, contour_width=3, contour_color='steelblue').generate(fake_text)\n", + " plt.figure(figsize=(10, 6))\n", + " plt.imshow(wordcloud, interpolation='bilinear')\n", + " plt.axis('off')\n", + " plt.title('Word Cloud - Fake News')\n", + " plt.savefig('visualizations/fake_news_wordcloud.png', dpi=300, bbox_inches='tight')\n", + " plt.close()\n", + "\n", + "# Real news word cloud\n", + "real_text = ' '.join(combined_df[combined_df['label'] == 1]['cleaned_statement'])\n", + "if len(real_text) > 0:\n", + " wordcloud = WordCloud(width=800, height=400, background_color='white',\n", + " max_words=200, contour_width=3, contour_color='steelblue').generate(real_text)\n", + " plt.figure(figsize=(10, 6))\n", + " plt.imshow(wordcloud, interpolation='bilinear')\n", + " plt.axis('off')\n", + " plt.title('Word Cloud - Real News')\n", + " plt.savefig('visualizations/real_news_wordcloud.png', dpi=300, bbox_inches='tight')\n", + " plt.close()\n", + "print(\"✓ Word clouds saved\")\n", + "\n", + "# 5. Model performance comparison\n", + "plt.figure(figsize=(12, 6))\n", + "metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score']\n", + "x = np.arange(len(metrics))\n", + "width = 0.25\n", + "\n", + "# Plot bars for each model\n", + "plt.bar(x - width, [lr_test_acc, lr_test_precision, lr_test_recall, lr_test_f1],\n", + " width, label='Logistic Regression', color='#ff9999')\n", + "plt.bar(x, [rf_test_acc, rf_test_precision, rf_test_recall, rf_test_f1],\n", + " width, label='Random Forest', color='#66b3ff')\n", + "plt.bar(x + width, [distilbert_acc, distilbert_precision, distilbert_recall, distilbert_f1],\n", + " width, label='DistilBERT', color='#99ff99')\n", + "\n", + "plt.ylabel('Score')\n", + "plt.title('Model Performance Comparison')\n", + "plt.xticks(x, metrics)\n", + "plt.ylim(0, 1.0)\n", + "plt.legend(loc='lower right')\n", + "plt.savefig('visualizations/model_comparison.png', dpi=300, bbox_inches='tight')\n", + "plt.close()\n", + "print(\"✓ Model comparison visualization saved\")\n", + "\n", + "# 6. ROC curves for all models\n", + "plt.figure(figsize=(10, 8))\n", + "\n", + "# Function to plot ROC curve\n", + "def plot_roc_curve(y_true, y_pred_prob, label, color):\n", + " if len(y_pred_prob.shape) > 1 and y_pred_prob.shape[1] == 2:\n", + " y_pred_prob = y_pred_prob[:, 1]\n", + "\n", + " fpr, tpr, _ = roc_curve(y_true, y_pred_prob)\n", + " roc_auc = auc(fpr, tpr)\n", + " plt.plot(fpr, tpr, color=color, lw=2, label=f'{label} (AUC = {roc_auc:.2f})')\n", + "\n", + "# LR ROC\n", + "lr_probs = lr_best_model.predict_proba(X_test_tfidf)\n", + "plot_roc_curve(y_test, lr_probs, 'Logistic Regression', '#ff9999')\n", + "\n", + "# RF ROC\n", + "rf_probs = rf_best_model.predict_proba(X_test_tfidf)\n", + "plot_roc_curve(y_test, rf_probs, 'Random Forest', '#66b3ff')\n", + "\n", + "# DistilBERT ROC - need to get probabilities**\n", + "distilbert_probs = []\n", + "with torch.no_grad():\n", + " for batch in test_dataloader:\n", + " input_ids = batch['input_ids'].to(device)\n", + " attention_mask = batch['attention_mask'].to(device)\n", + " outputs = distilbert_model(input_ids=input_ids, attention_mask=attention_mask)\n", + " probs = torch.softmax(outputs, dim=1)\n", + " distilbert_probs.extend(probs.cpu().numpy())\n", + "\n", + "distilbert_probs = np.array(distilbert_probs)\n", + "plot_roc_curve(test_labels, distilbert_probs, 'DistilBERT', '#99ff99')\n", + "\n", + "# Plot diagonal\n", + "plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')\n", + "plt.xlim([0.0, 1.0])\n", + "plt.ylim([0.0, 1.05])\n", + "plt.xlabel('False Positive Rate')\n", + "plt.ylabel('True Positive Rate')\n", + "plt.title('Receiver Operating Characteristic (ROC) Curves')\n", + "plt.legend(loc='lower right')\n", + "plt.grid(True, linestyle='--', alpha=0.7)\n", + "plt.savefig('visualizations/roc_curves.png', dpi=300, bbox_inches='tight')\n", + "plt.close()\n", + "print(\"✓ ROC curves saved\")\n", + "\n", + "# 7. Text length distribution\n", + "text_lengths = combined_df['statement'].apply(lambda x: len(str(x).split()))\n", + "plt.figure(figsize=(10, 6))\n", + "plt.hist(text_lengths, bins=50, alpha=0.7, color='skyblue')\n", + "plt.xlabel('Number of Words')\n", + "plt.ylabel('Frequency')\n", + "plt.title('Text Length Distribution')\n", + "plt.axvline(text_lengths.mean(), color='r', linestyle='--', label=f'Mean: {text_lengths.mean():.1f} words')\n", + "plt.axvline(text_lengths.median(), color='g', linestyle='--', label=f'Median: {text_lengths.median():.1f} words')\n", + "plt.legend()\n", + "plt.savefig('visualizations/text_length_distribution.png', dpi=300, bbox_inches='tight')\n", + "plt.close()\n", + "print(\"✓ Text length distribution saved\")\n", + "print(\"✓ ALL DONE!\")" + ], + "metadata": { + "id": "yy38BYKlhfZr", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "42d0efaf-0a17-4a7d-d00d-18edcb1eb0cc" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "CREATING VISUALIZATIONS FOR FINAL REPORT...\n", + "✓ Class distribution visualization saved\n", + "✓ Confusion matrices saved\n", + "✓ Feature importance visualization saved\n", + "✓ Word clouds saved\n", + "✓ Model comparison visualization saved\n", + "✓ ROC curves saved\n", + "✓ Text length distribution saved\n", + "✓ ALL DONE!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### 8. SAVE MODELS AND FINAL CONCLUSION" + ], + "metadata": { + "id": "1MtDA4J7mggY" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"\\nSAVING MODELS...\")\n", + "\n", + "os.makedirs('models', exist_ok=True)\n", + "\n", + "import pickle\n", + "with open('models/tfidf_vectorizer.pkl', 'wb') as f:\n", + " pickle.dump(tfidf_vectorizer, f)\n", + "\n", + "with open('models/lr_model.pkl', 'wb') as f:\n", + " pickle.dump(lr_best_model, f)\n", + "\n", + "with open('models/rf_model.pkl', 'wb') as f:\n", + " pickle.dump(rf_best_model, f)\n", + "\n", + "torch.save(distilbert_model.state_dict(), 'models/distilbert_model.pt')\n", + "\n", + "print(\"Models saved!\")\n", + "\n", + "# CONCLUSIONS\n", + "print(\"\\nCONCLUSIONS\")\n", + "print(f\"Best model: {best_model_name}\")\n", + "print(f\"Dataset stats: {combined_class_counts[0]} fake and {combined_class_counts[1]} real news articles\")\n", + "print(\"Project Completed!\")" + ], + "metadata": { + "id": "VOeP-RfaJSTG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0c4eec50-09ef-4d69-bbbc-425691230317" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "SAVING MODELS...\n", + "Models saved!\n", + "\n", + "CONCLUSIONS\n", + "Best model: DistilBERT\n", + "Dataset stats: 40429 fake and 27431 real news articles\n", + "Project Completed!\n" + ] + } + ] + } + ] +} \ No newline at end of file