{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a87fe5f3", "metadata": { "id": "a87fe5f3" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/opt/anaconda3/envs/CSCI544/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import pandas as pd\n", "import torch\n", "from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig, EarlyStoppingCallback, PreTrainedTokenizer\n", "from torch.utils.data import DataLoader\n", "import sys\n", "from peft import LoraConfig, get_peft_model, TaskType\n", "from huggingface_hub import snapshot_download\n", "import os\n", "import re\n", "import contextlib #helps make pip silent\n", "import sys\n", "import os\n", "import numpy as np\n", "\n", "\n", "\"\"\"\"\n", "with contextlib.redirect_stdout(sys.__stdout__), contextlib.redirect_stderr(sys.__stderr__):\n", " %pip install datasets\n", " %pip install sql_metadata\n", "\"\"\"\n", "from datasets import Dataset\n", "from sql_metadata import Parser" ] }, { "cell_type": "code", "execution_count": 2, "id": "4ec432b2", "metadata": { "id": "4ec432b2" }, "outputs": [], "source": [ "is_google_colab = False\n", "use_bnb = False" ] }, { "cell_type": "code", "execution_count": 3, "id": "47577a7f", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170, "referenced_widgets": [ "9200f1303f124bddaa6114cdf0f5f878", "17ddbb74e1764f37b8d34c311fae200c", "ef732739334b4ac593fd665e01cd83c1", "949ee3d1a9cd4060864dec5d4283ef2c", "b98629e053674527aacca899ab7f11a9", "84cc47dc70864bf3aa7599c06eb13c51", "5d711bb927024d8d9f9b8bb685d6f388", "3b80c66e0f384c45ab4187301599fab2", "db6a23e658a34722a8f22505c6ace7b4", "7751defbc4534d518d9e923b9019aa8b", "fe6352bce22a40e7a936e7f90313bd02" ] }, "id": "47577a7f", "outputId": "999c4e88-3f89-49b1-9e21-abac91703bf3" }, "outputs": [], "source": [ "current_read_path = \"./\"\n", "current_write_path = \"./\"\n", "\n", "def read_path(rel_path):\n", " return os.path.join(current_read_path, rel_path)\n", "\n", "def write_path(rel_path):\n", " return os.path.join(current_write_path, rel_path)\n", "\n", "if is_google_colab:\n", " from google.colab import drive\n", " drive.mount('/content/drive')\n", " current_write_path = \"/content/drive/MyDrive/sql_gen\"\n", "\n", " hugging_face_path = snapshot_download(\n", " repo_id=\"USC-Applied-NLP-Group/SQL-Generation\",\n", " repo_type=\"model\",\n", " allow_patterns=[\"train-data/*\", \"deepseek-coder-1.3b-instruct/*\", \"src/*\", \"nba-data/*\"],\n", " )\n", " sys.path.append(hugging_face_path)\n", " current_read_path = hugging_face_path\n", "else:\n", " base_path = os.getcwd() # Use current working directory in notebooks\n", " sys.path.append(os.path.abspath(os.path.join(base_path, '../..')))" ] }, { "cell_type": "code", "execution_count": 4, "id": "10b675d0", "metadata": { "id": "10b675d0" }, "outputs": [], "source": [ "from src.prompts.pre_rag_prompt import input_text as input_prompt" ] }, { "cell_type": "markdown", "id": "fdd54aab", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 5, "id": "de7c3cd3", "metadata": { "id": "de7c3cd3" }, "outputs": [], "source": [ "MODEL_DIR = write_path(\"dyn_rag_test\")\n", "VAL_OUTPUT = write_path(\"dyn_rag_test.hf\")" ] }, { "cell_type": "markdown", "id": "4b7eb12a", "metadata": { "id": "4b7eb12a" }, "source": [ "## Prepare Model" ] }, { "cell_type": "code", "execution_count": 6, "id": "3d0c0e3b", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3d0c0e3b", "outputId": "a64bc20b-a33f-453e-e445-cd08109ed43b" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/g0/47tr69v179dg7w6zyphp9b280000gn/T/ipykernel_70691/3415966509.py:2: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n" ] } ], "source": [ "df = pd.read_csv(read_path(\"train-data/sql_train.tsv\"), sep='\\t')\n", "df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n", "\n", "model_name = read_path(\"deepseek-coder-1.3b-instruct\")\n", "tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "device_name = 'cuda' if torch.cuda.is_available() else 'cpu'\n", "device = torch.device(device_name)\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " device_map=device\n", ")\n", "tokenizer.truncation_side = \"left\"\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "11a687f7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Map: 100%|██████████| 1044/1044 [00:07<00:00, 132.69 examples/s]\n" ] } ], "source": [ "from src.rag.get_tokenized_dataset import get_tokenized_dataset\n", "\n", "train_dataset, val_dataset = get_tokenized_dataset(df, tokenizer, input_prompt)\n" ] }, { "cell_type": "code", "execution_count": 9, "id": "bc8b8212", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'tuple' object has no attribute '__dict__'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[9], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minitialize_deepseek_model\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m initialize_deepseek_model\n\u001b[0;32m----> 3\u001b[0m model, trainer \u001b[38;5;241m=\u001b[39m initialize_deepseek_model(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR)\n", "File \u001b[0;32m~/Documents/USC/spring_2025/NLP/SQL-Generation/src/model/initialize_deepseek_model.py:23\u001b[0m, in \u001b[0;36minitialize_deepseek_model\u001b[0;34m(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minitialize_deepseek_model\u001b[39m(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR):\n\u001b[1;32m 6\u001b[0m lora_config \u001b[38;5;241m=\u001b[39m LoraConfig(\n\u001b[1;32m 7\u001b[0m r\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m16\u001b[39m, \u001b[38;5;66;03m# Rank of LoRA matrices (adjust for memory vs. accuracy)\u001b[39;00m\n\u001b[1;32m 8\u001b[0m lora_alpha\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m32\u001b[39m, \u001b[38;5;66;03m# Scaling factor\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 20\u001b[0m ]\n\u001b[1;32m 21\u001b[0m )\n\u001b[0;32m---> 23\u001b[0m model \u001b[38;5;241m=\u001b[39m get_peft_model(model, lora_config)\n\u001b[1;32m 24\u001b[0m model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m 26\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m 27\u001b[0m output_dir\u001b[38;5;241m=\u001b[39mMODEL_DIR,\n\u001b[1;32m 28\u001b[0m eval_strategy\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mepoch\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;66;03m# Evaluate at the end of each epoch\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 42\u001b[0m greater_is_better\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 43\u001b[0m )\n", "File \u001b[0;32m/opt/anaconda3/envs/CSCI544/lib/python3.11/site-packages/peft/mapping_func.py:66\u001b[0m, in \u001b[0;36mget_peft_model\u001b[0;34m(model, peft_config, adapter_name, mixed, autocast_adapter_dtype, revision, low_cpu_mem_usage)\u001b[0m\n\u001b[1;32m 64\u001b[0m model_config \u001b[38;5;241m=\u001b[39m BaseTuner\u001b[38;5;241m.\u001b[39mget_model_config(model)\n\u001b[1;32m 65\u001b[0m old_name \u001b[38;5;241m=\u001b[39m peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path\n\u001b[0;32m---> 66\u001b[0m new_name \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname_or_path\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 67\u001b[0m peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path \u001b[38;5;241m=\u001b[39m new_name\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# Especially in notebook environments there could be a case that a user wants to experiment with different\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;66;03m# configuration values. However, it is likely that there won't be any changes for new configs on an already\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;66;03m# initialized PEFT model. The best we can do is warn the user about it.\u001b[39;00m\n", "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute '__dict__'" ] } ], "source": [ "from src.model.initialize_deepseek_model import initialize_deepseek_model\n", "\n", "model, trainer = initialize_deepseek_model(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR)" ] }, { "cell_type": "code", "execution_count": 9, "id": "8890a657", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8890a657", "outputId": "95b9971d-d446-432b-9faa-baa1c060d66a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'NoneType' object has no attribute 'cadam32bit_grad_fp32'\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/opt/anaconda3/envs/CSCI544/lib/python3.11/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n", " warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n" ] } ], "source": [ "# Define LoRA configuration\n", "lora_config = LoraConfig(\n", " r=16, # Rank of LoRA matrices (adjust for memory vs. accuracy)\n", " lora_alpha=32, # Scaling factor\n", " lora_dropout=0.0, # Dropout for regularization\n", " bias=\"none\",\n", " task_type=TaskType.CAUSAL_LM,\n", " target_modules=[\n", " \"q_proj\",\n", " \"k_proj\",\n", " \"v_proj\",\n", " \"o_proj\",\n", " \"gate_proj\",\n", " \"up_proj\",\n", " \"down_proj\"\n", " ]\n", ")\n", "\n", "# Wrap model with LoRA adapters\n", "model = get_peft_model(model, lora_config)\n", "model = model.to(device)" ] }, { "cell_type": "code", "execution_count": 10, "id": "d9508451", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "d9508451", "outputId": "d004fa38-78a0-49ee-eed5-bbc6373ccae2" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/g0/47tr69v179dg7w6zyphp9b280000gn/T/ipykernel_70404/2486149154.py:21: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n", " trainer = Trainer(\n", "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n" ] } ], "source": [ "training_args = TrainingArguments(\n", " output_dir=MODEL_DIR,\n", " eval_strategy=\"epoch\", # Evaluate at the end of each epoch\n", " save_strategy=\"epoch\", # Save model every epoch\n", " per_device_train_batch_size=1, # LoRA allows higher batch size\n", " per_device_eval_batch_size=1,\n", " gradient_accumulation_steps=16,\n", " num_train_epochs=10, # Increase if needed\n", " learning_rate=5e-5, # Higher LR since we're only training LoRA layers\n", " weight_decay=0.001,\n", " logging_steps=50, # Print loss every 50 steps\n", " save_total_limit=2, # Keep last 4 checkpoints\n", " bf16=True if torch.cuda.is_available() else False,\n", " push_to_hub=False,\n", " load_best_model_at_end=True,\n", " metric_for_best_model=\"eval_loss\",\n", " greater_is_better=False\n", ")\n", "\n", "# Trainer setup\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=val_dataset,\n", " tokenizer=tokenizer,\n", " callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "b0ff5278", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 214 }, "id": "b0ff5278", "outputId": "07e6446f-c680-4532-caad-d62a7d3edd6d" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\n", "KeyboardInterrupt\n", "\n" ] } ], "source": [ "# Run training\n", "trainer.train()\n", "\n", "# Merge LoRA adapters with the base model before saving\n", "model = model.merge_and_unload()\n", "model.save_pretrained(MODEL_DIR)\n", "tokenizer.save_pretrained(MODEL_DIR)" ] }, { "cell_type": "code", "execution_count": null, "id": "J7qO7FE73i40", "metadata": { "id": "J7qO7FE73i40" }, "outputs": [], "source": [ "\n", "# Prepare query with the same prompt\n", "input_text = \"How many points do the Los Angeles Lakers average at home?\"\n", "message = [{'role': 'user', 'content': input_prompt + input_text}]\n", "inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors=\"pt\").to(model.device)\n", "\n", "# Generate Tables\n", "outputs = model.generate(\n", " inputs,\n", " max_new_tokens=256,\n", ")\n", "model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n", "\n", "print(\"Generated Tables:\", model_output)" ] }, { "cell_type": "code", "execution_count": null, "id": "kwHMVyQa3n89", "metadata": { "id": "kwHMVyQa3n89" }, "outputs": [], "source": [ "import sqlite3 as sql\n", "\n", "prompt_length = len(input_prompt)\n", "\n", "print(prompt_length)\n", "\n", "# Create connection to sqlite3 database\n", "connection = sql.connect(read_path('nba-data/nba.sqlite'))\n", "cursor = connection.cursor()\n", "\n", "for v in val_dataset:\n", " full_example = tokenizer.decode(v[\"input_ids\"], skip_special_tokens=True)\n", " user_prompt = full_example[:prompt_length]\n", " question, tables = full_example[prompt_length:].split(\"Tables:\\n\")\n", " print(question)\n", " print(tables)\n", " break\n" ] }, { "cell_type": "code", "execution_count": null, "id": "LhiHqAaB9uE4", "metadata": { "id": "LhiHqAaB9uE4" }, "outputs": [], "source": [ "def extract_tables_from_string(s):\n", " keywords = {\"game\", \"team\", \"other_stats\"}\n", " found = {k for k in keywords if k in s}\n", " return found" ] }, { "cell_type": "markdown", "id": "Kdd8nxWD9txh", "metadata": { "id": "Kdd8nxWD9txh" }, "source": [] }, { "cell_type": "code", "execution_count": null, "id": "KjAXaUgp4TfY", "metadata": { "id": "KjAXaUgp4TfY" }, "outputs": [], "source": [ "def compare_table_lists(actual_tables, generated_tables):\n", " actual_set = extract_tables_from_string(actual_tables)\n", " generated_set = extract_tables_from_string(generated_tables)\n", "\n", " # Check if they match\n", " return generated_set == actual_set" ] }, { "cell_type": "code", "execution_count": null, "id": "8h7bpMML6G6v", "metadata": { "id": "8h7bpMML6G6v" }, "outputs": [], "source": [ "\n", "num_sql_matched = 0\n", "\n", "first_actual = []\n", "first_model = []\n", "print(\"Evaluating...\")\n", "for v in val_dataset:\n", " full_example = tokenizer.decode(v[\"input_ids\"], skip_special_tokens=True)\n", " user_prompt = full_example[:prompt_length]\n", " question, training_tables = full_example[prompt_length:].split(\"Tables:\\n\")\n", " #print(question)\n", " #print(sql_query)\n", "\n", " # Obtain model output\n", " message = [{'role': 'user', 'content': input_prompt + question}]\n", " inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors=\"pt\").to(model.device)\n", "\n", " # Generate SQL query\n", " outputs = model.generate(\n", " inputs,\n", " max_new_tokens=256,\n", " pad_token_id=tokenizer.eos_token_id,\n", " )\n", " model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n", " after_last_colon = model_output.rsplit(\":\", 1)[-1]\n", " tables_string = after_last_colon.replace('\\n', '').replace('\\r', '')\n", " #print(\"Training tables:\", training_tables)\n", " #print(\"Model tables:\", tables_string.split(\" \"))\n", " first_actual = training_tables\n", " first_model = tables_string\n", " result = compare_table_lists(training_tables, tables_string)\n", " if result:\n", " num_sql_matched += 1\n", "\n", "print(\"Accuracy :\", num_sql_matched/len(val_dataset))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "CoJeZ4FoUMp_", "metadata": { "id": "CoJeZ4FoUMp_" }, "outputs": [], "source": [ "\n", "num_sql_matched = 0\n", "\n", "first_actual = []\n", "first_model = []\n", "print(\"Evaluating...\")\n", "for v in val_dataset:\n", " full_example = tokenizer.decode(v[\"input_ids\"], skip_special_tokens=True)\n", " user_prompt = full_example[:prompt_length]\n", " question, training_tables = full_example[prompt_length:].split(\"Tables:\\n\")\n", " #print(question)\n", " #print(sql_query)\n", "\n", " # Obtain model output\n", " message = [{'role': 'user', 'content': input_prompt + question}]\n", " inputs = tokenizer.apply_chat_template(message, add_generation_prompt=True, return_tensors=\"pt\").to(model.device)\n", "\n", " # Generate SQL query\n", " outputs = model.generate(\n", " inputs,\n", " max_new_tokens=256,\n", " pad_token_id=tokenizer.eos_token_id,\n", " )\n", " model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n", " after_last_colon = model_output.rsplit(\":\", 1)[-1]\n", " tables_string = after_last_colon.replace('\\n', '').replace('\\r', '')\n", " #print(\"Training tables:\", training_tables)\n", " #print(\"Model tables:\", tables_string.split(\" \"))\n", " first_actual = training_tables\n", " first_model = tables_string\n", " result = compare_table_lists(training_tables, tables_string)\n", " if result:\n", " num_sql_matched += 1\n", "\n", "print(\"Accuracy :\", num_sql_matched/len(val_dataset))\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "lNG1joS3T8DN", "metadata": { "id": "lNG1joS3T8DN" }, "outputs": [], "source": [ "model = AutoModelForCausalLM.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, device_map=device)\n", "tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)\n" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "A100", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.11" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "13ae11c314664c44ae18d35cf57a1334": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "17ddbb74e1764f37b8d34c311fae200c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_84cc47dc70864bf3aa7599c06eb13c51", "placeholder": "​", "style": "IPY_MODEL_5d711bb927024d8d9f9b8bb685d6f388", "value": "Fetching 37 files: 100%" } }, "1a8c093fccbb437db6e0390a920f5cc5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_13ae11c314664c44ae18d35cf57a1334", "max": 1044, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e68cfd05ba994a34b93107d2eab82ad3", "value": 1044 } }, "1ec5329ea0434df4b74d0f311e016c3e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "3b80c66e0f384c45ab4187301599fab2": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4c41e81bcd254df7b1265206a5a6b40b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_dd24270dc07942a6972fbfaf58129989", "placeholder": "​", "style": "IPY_MODEL_643903cd7a5b4a52a4687ec38eb8c4dc", "value": "Map: 100%" } }, "5d711bb927024d8d9f9b8bb685d6f388": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5d89a5574a3d4a8993e6dca78d406d2d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "643903cd7a5b4a52a4687ec38eb8c4dc": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "68ff2fc00bd041e7b79a811e3de1e596": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_4c41e81bcd254df7b1265206a5a6b40b", "IPY_MODEL_1a8c093fccbb437db6e0390a920f5cc5", "IPY_MODEL_e11d04a9d22a4229922e3eb4e3eb6466" ], "layout": "IPY_MODEL_5d89a5574a3d4a8993e6dca78d406d2d" } }, "7751defbc4534d518d9e923b9019aa8b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "84cc47dc70864bf3aa7599c06eb13c51": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9200f1303f124bddaa6114cdf0f5f878": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_17ddbb74e1764f37b8d34c311fae200c", "IPY_MODEL_ef732739334b4ac593fd665e01cd83c1", "IPY_MODEL_949ee3d1a9cd4060864dec5d4283ef2c" ], "layout": "IPY_MODEL_b98629e053674527aacca899ab7f11a9" } }, "949ee3d1a9cd4060864dec5d4283ef2c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7751defbc4534d518d9e923b9019aa8b", "placeholder": "​", "style": "IPY_MODEL_fe6352bce22a40e7a936e7f90313bd02", "value": " 37/37 [00:00<00:00, 3657.54it/s]" } }, "b98629e053674527aacca899ab7f11a9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "db6a23e658a34722a8f22505c6ace7b4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "dd24270dc07942a6972fbfaf58129989": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e11d04a9d22a4229922e3eb4e3eb6466": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ea283e7e8b234519b881c562b7eb01d3", "placeholder": "​", "style": "IPY_MODEL_1ec5329ea0434df4b74d0f311e016c3e", "value": " 1044/1044 [00:10<00:00, 43.90 examples/s]" } }, "e68cfd05ba994a34b93107d2eab82ad3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ea283e7e8b234519b881c562b7eb01d3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ef732739334b4ac593fd665e01cd83c1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3b80c66e0f384c45ab4187301599fab2", "max": 37, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_db6a23e658a34722a8f22505c6ace7b4", "value": 37 } }, "fe6352bce22a40e7a936e7f90313bd02": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 5 }