clean rag training
Browse files- src/model/initialize_deepseek_model.py +54 -0
- src/prompts/pre_rag_prompt.py +3 -9
- src/rag/evaluate_rag.py +0 -0
- src/rag/get_tokenized_dataset.py +47 -0
- train_dynamic_rag.ipynb +327 -534
src/model/initialize_deepseek_model.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig, EarlyStoppingCallback, PreTrainedTokenizer
|
2 |
+
from peft import LoraConfig, get_peft_model, TaskType
|
3 |
+
import torch
|
4 |
+
|
5 |
+
def initialize_deepseek_model(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR):
|
6 |
+
lora_config = LoraConfig(
|
7 |
+
r=16, # Rank of LoRA matrices (adjust for memory vs. accuracy)
|
8 |
+
lora_alpha=32, # Scaling factor
|
9 |
+
lora_dropout=0.0, # Dropout for regularization
|
10 |
+
bias="none",
|
11 |
+
task_type=TaskType.CAUSAL_LM,
|
12 |
+
target_modules=[
|
13 |
+
"q_proj",
|
14 |
+
"k_proj",
|
15 |
+
"v_proj",
|
16 |
+
"o_proj",
|
17 |
+
"gate_proj",
|
18 |
+
"up_proj",
|
19 |
+
"down_proj"
|
20 |
+
]
|
21 |
+
)
|
22 |
+
|
23 |
+
model = get_peft_model(model, lora_config)
|
24 |
+
model = model.to(device)
|
25 |
+
|
26 |
+
training_args = TrainingArguments(
|
27 |
+
output_dir=MODEL_DIR,
|
28 |
+
eval_strategy="epoch", # Evaluate at the end of each epoch
|
29 |
+
save_strategy="epoch", # Save model every epoch
|
30 |
+
per_device_train_batch_size=1, # LoRA allows higher batch size
|
31 |
+
per_device_eval_batch_size=1,
|
32 |
+
gradient_accumulation_steps=16,
|
33 |
+
num_train_epochs=10, # Increase if needed
|
34 |
+
learning_rate=5e-5, # Higher LR since we're only training LoRA layers
|
35 |
+
weight_decay=0.001,
|
36 |
+
logging_steps=50, # Print loss every 50 steps
|
37 |
+
save_total_limit=2, # Keep last 4 checkpoints
|
38 |
+
bf16=True if torch.cuda.is_available() else False,
|
39 |
+
push_to_hub=False,
|
40 |
+
load_best_model_at_end=True,
|
41 |
+
metric_for_best_model="eval_loss",
|
42 |
+
greater_is_better=False
|
43 |
+
)
|
44 |
+
|
45 |
+
trainer = Trainer(
|
46 |
+
model=model,
|
47 |
+
args=training_args,
|
48 |
+
train_dataset=train_dataset,
|
49 |
+
eval_dataset=val_dataset,
|
50 |
+
tokenizer=tokenizer,
|
51 |
+
callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
|
52 |
+
)
|
53 |
+
|
54 |
+
return model, trainer
|
src/prompts/pre_rag_prompt.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
-
input_text = """
|
2 |
-
are relevant to the query. Here is the schema of the
|
3 |
|
4 |
-
Table "team"
|
5 |
-
Stores information about NBA teams.
|
6 |
Table "team"
|
7 |
identifier for the team,
|
8 |
full official name of the team ("Los Angeles Lakers"),
|
@@ -38,7 +36,7 @@ assists by the home team,
|
|
38 |
steals by the home team,
|
39 |
blocks by the home team,
|
40 |
turnovers by the home team,
|
41 |
-
personal fouls by the home team
|
42 |
total points scored by the home team,
|
43 |
plus/minus rating for the home team,
|
44 |
iD of the away team,
|
@@ -113,10 +111,6 @@ Response:
|
|
113 |
SQLite:
|
114 |
team
|
115 |
|
116 |
-
Request:
|
117 |
-
"Which team had the highest number of team turnovers in an away game?"
|
118 |
-
Response:
|
119 |
-
other_stats
|
120 |
|
121 |
Request:
|
122 |
"What was the average number of fastbreak points scored by the Los Angeles Lakers in home wins during the 2020 season?"
|
|
|
1 |
+
input_text = """Given natural language queries tells me which tables
|
2 |
+
are relevant to the query. Here is the schema of the database.
|
3 |
|
|
|
|
|
4 |
Table "team"
|
5 |
identifier for the team,
|
6 |
full official name of the team ("Los Angeles Lakers"),
|
|
|
36 |
steals by the home team,
|
37 |
blocks by the home team,
|
38 |
turnovers by the home team,
|
39 |
+
personal fouls by the home team,
|
40 |
total points scored by the home team,
|
41 |
plus/minus rating for the home team,
|
42 |
iD of the away team,
|
|
|
111 |
SQLite:
|
112 |
team
|
113 |
|
|
|
|
|
|
|
|
|
114 |
|
115 |
Request:
|
116 |
"What was the average number of fastbreak points scored by the Los Angeles Lakers in home wins during the 2020 season?"
|
src/rag/evaluate_rag.py
ADDED
File without changes
|
src/rag/get_tokenized_dataset.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import Dataset
|
2 |
+
from sql_metadata import Parser
|
3 |
+
|
4 |
+
|
5 |
+
def format_deepseek_chat(example, tokenizer, input_prompt):
|
6 |
+
# Manually build the prompt as one flat string
|
7 |
+
prompt = f"{input_prompt}{example['natural_query']}\n"
|
8 |
+
completion = f"Tables:\n{example['tables']}"
|
9 |
+
|
10 |
+
full_text = prompt + completion
|
11 |
+
tokenized = tokenizer(
|
12 |
+
full_text,
|
13 |
+
truncation=True,
|
14 |
+
padding="max_length",
|
15 |
+
max_length=3156, # or whatever your model can handle
|
16 |
+
)
|
17 |
+
|
18 |
+
# Mask out prompt tokens in the labels
|
19 |
+
prompt_len = len(tokenizer(prompt, truncation=True)["input_ids"])
|
20 |
+
labels = tokenized["input_ids"][:]
|
21 |
+
labels[:prompt_len] = [-100] * prompt_len
|
22 |
+
tokenized["labels"] = labels
|
23 |
+
|
24 |
+
return tokenized
|
25 |
+
|
26 |
+
|
27 |
+
def get_tokenized_dataset(nba_df, tokenizer, input_prompt):
|
28 |
+
natural_query_list = nba_df["natural_query"].tolist()
|
29 |
+
sql_query_list = nba_df["sql_query"].tolist()
|
30 |
+
tables = [Parser(sql_query).tables for sql_query in sql_query_list]
|
31 |
+
|
32 |
+
dataset_dict = {
|
33 |
+
"natural_query": natural_query_list,
|
34 |
+
"tables": tables,
|
35 |
+
}
|
36 |
+
|
37 |
+
# Create HuggingFace Dataset
|
38 |
+
dataset = Dataset.from_dict(dataset_dict)
|
39 |
+
|
40 |
+
tokenized_dataset = dataset.map(
|
41 |
+
lambda x: format_deepseek_chat(x, tokenizer, input_prompt),
|
42 |
+
remove_columns=["natural_query", "tables"]
|
43 |
+
)
|
44 |
+
split = int(0.9 * len(tokenized_dataset)) # 90% train, 10% validation
|
45 |
+
train_dataset = tokenized_dataset.select(range(split))
|
46 |
+
val_dataset = tokenized_dataset.select(range(split, len(tokenized_dataset)))
|
47 |
+
return train_dataset, val_dataset
|
train_dynamic_rag.ipynb
CHANGED
@@ -7,7 +7,16 @@
|
|
7 |
"metadata": {
|
8 |
"id": "a87fe5f3"
|
9 |
},
|
10 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"source": [
|
12 |
"import pandas as pd\n",
|
13 |
"import torch\n",
|
@@ -23,9 +32,7 @@
|
|
23 |
"import os\n",
|
24 |
"import numpy as np\n",
|
25 |
"\n",
|
26 |
-
"
|
27 |
-
" %pip install datasets\n",
|
28 |
-
" %pip install sql_metadata\n",
|
29 |
"\"\"\"\"\n",
|
30 |
"with contextlib.redirect_stdout(sys.__stdout__), contextlib.redirect_stderr(sys.__stderr__):\n",
|
31 |
" %pip install datasets\n",
|
@@ -44,7 +51,7 @@
|
|
44 |
},
|
45 |
"outputs": [],
|
46 |
"source": [
|
47 |
-
"is_google_colab =
|
48 |
"use_bnb = False"
|
49 |
]
|
50 |
},
|
@@ -73,41 +80,7 @@
|
|
73 |
"id": "47577a7f",
|
74 |
"outputId": "999c4e88-3f89-49b1-9e21-abac91703bf3"
|
75 |
},
|
76 |
-
"outputs": [
|
77 |
-
{
|
78 |
-
"output_type": "stream",
|
79 |
-
"name": "stdout",
|
80 |
-
"text": [
|
81 |
-
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
|
82 |
-
]
|
83 |
-
},
|
84 |
-
{
|
85 |
-
"output_type": "stream",
|
86 |
-
"name": "stderr",
|
87 |
-
"text": [
|
88 |
-
"/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
|
89 |
-
"The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
|
90 |
-
"To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
|
91 |
-
"You will be able to reuse this secret in all of your notebooks.\n",
|
92 |
-
"Please note that authentication is recommended but still optional to access public models or datasets.\n",
|
93 |
-
" warnings.warn(\n"
|
94 |
-
]
|
95 |
-
},
|
96 |
-
{
|
97 |
-
"output_type": "display_data",
|
98 |
-
"data": {
|
99 |
-
"text/plain": [
|
100 |
-
"Fetching 37 files: 0%| | 0/37 [00:00<?, ?it/s]"
|
101 |
-
],
|
102 |
-
"application/vnd.jupyter.widget-view+json": {
|
103 |
-
"version_major": 2,
|
104 |
-
"version_minor": 0,
|
105 |
-
"model_id": "9200f1303f124bddaa6114cdf0f5f878"
|
106 |
-
}
|
107 |
-
},
|
108 |
-
"metadata": {}
|
109 |
-
}
|
110 |
-
],
|
111 |
"source": [
|
112 |
"current_read_path = \"./\"\n",
|
113 |
"current_write_path = \"./\"\n",
|
@@ -147,6 +120,12 @@
|
|
147 |
"from src.prompts.pre_rag_prompt import input_text as input_prompt"
|
148 |
]
|
149 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
{
|
151 |
"cell_type": "code",
|
152 |
"execution_count": 5,
|
@@ -183,183 +162,79 @@
|
|
183 |
},
|
184 |
"outputs": [
|
185 |
{
|
186 |
-
"output_type": "stream",
|
187 |
"name": "stderr",
|
188 |
-
"text": [
|
189 |
-
"<ipython-input-6-ac29fbf828da>:2: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
|
190 |
-
" df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n"
|
191 |
-
]
|
192 |
-
},
|
193 |
-
{
|
194 |
"output_type": "stream",
|
195 |
-
"name": "stdout",
|
196 |
"text": [
|
197 |
-
"
|
|
|
198 |
]
|
199 |
}
|
200 |
],
|
201 |
"source": [
|
202 |
-
"\n",
|
203 |
"df = pd.read_csv(read_path(\"train-data/sql_train.tsv\"), sep='\\t')\n",
|
204 |
"df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n",
|
205 |
"\n",
|
206 |
-
"# Display dataset info\n",
|
207 |
-
"print(f\"Total dataset examples: {len(df)}\")\n",
|
208 |
-
"\n",
|
209 |
-
"# Load tokenizer\n",
|
210 |
"model_name = read_path(\"deepseek-coder-1.3b-instruct\")\n",
|
211 |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
212 |
-
"\n",
|
213 |
-
"# Enable 8-bit quantization for lower memory usage\n",
|
214 |
-
"bnb_config = None\n",
|
215 |
-
"if use_bnb:\n",
|
216 |
-
" bnb_config = BitsAndBytesConfig(\n",
|
217 |
-
" load_in_8bit=True,\n",
|
218 |
-
" bnb_8bit_compute_dtype=torch.float16\n",
|
219 |
-
" )\n",
|
220 |
-
"\n",
|
221 |
-
"# Load model with quantization\n",
|
222 |
-
"#device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
223 |
"device_name = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
224 |
"device = torch.device(device_name)\n",
|
|
|
225 |
"model = AutoModelForCausalLM.from_pretrained(\n",
|
226 |
" model_name,\n",
|
227 |
-
" quantization_config=bnb_config,\n",
|
228 |
" device_map=device\n",
|
229 |
")\n",
|
230 |
-
"\n",
|
231 |
"tokenizer.truncation_side = \"left\"\n"
|
232 |
]
|
233 |
},
|
234 |
{
|
235 |
"cell_type": "code",
|
236 |
"execution_count": 7,
|
237 |
-
"id": "
|
238 |
-
"metadata": {
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
" \"natural_query\": natural_query_list,\n",
|
249 |
-
" \"tables\": tables,\n",
|
250 |
-
"}\n",
|
251 |
-
"\n",
|
252 |
-
"# Create HuggingFace Dataset\n",
|
253 |
-
"dataset = Dataset.from_dict(dataset_dict)"
|
254 |
-
]
|
255 |
-
},
|
256 |
-
{
|
257 |
-
"cell_type": "code",
|
258 |
-
"execution_count": 8,
|
259 |
-
"id": "f385a9df",
|
260 |
-
"metadata": {
|
261 |
-
"id": "f385a9df"
|
262 |
-
},
|
263 |
-
"outputs": [],
|
264 |
"source": [
|
|
|
265 |
"\n",
|
266 |
-
"
|
267 |
-
" # Manually build the prompt as one flat string\n",
|
268 |
-
" prompt = f\"{input_prompt}{example['natural_query']}\\n\"\n",
|
269 |
-
" completion = f\"Tables:\\n{example['tables']}\"\n",
|
270 |
-
"\n",
|
271 |
-
" full_text = prompt + completion\n",
|
272 |
-
" tokenized = tokenizer(\n",
|
273 |
-
" full_text,\n",
|
274 |
-
" truncation=True,\n",
|
275 |
-
" padding=\"max_length\",\n",
|
276 |
-
" max_length=3156, # or whatever your model can handle\n",
|
277 |
-
" )\n",
|
278 |
-
"\n",
|
279 |
-
" # Mask out prompt tokens in the labels\n",
|
280 |
-
" prompt_len = len(tokenizer(prompt, truncation=True)[\"input_ids\"])\n",
|
281 |
-
" labels = tokenized[\"input_ids\"][:]\n",
|
282 |
-
" labels[:prompt_len] = [-100] * prompt_len\n",
|
283 |
-
" tokenized[\"labels\"] = labels\n",
|
284 |
-
"\n",
|
285 |
-
" return tokenized\n"
|
286 |
]
|
287 |
},
|
288 |
{
|
289 |
"cell_type": "code",
|
290 |
"execution_count": 9,
|
291 |
-
"id": "
|
292 |
-
"metadata": {
|
293 |
-
"colab": {
|
294 |
-
"base_uri": "https://localhost:8080/",
|
295 |
-
"height": 121,
|
296 |
-
"referenced_widgets": [
|
297 |
-
"68ff2fc00bd041e7b79a811e3de1e596",
|
298 |
-
"4c41e81bcd254df7b1265206a5a6b40b",
|
299 |
-
"1a8c093fccbb437db6e0390a920f5cc5",
|
300 |
-
"e11d04a9d22a4229922e3eb4e3eb6466",
|
301 |
-
"5d89a5574a3d4a8993e6dca78d406d2d",
|
302 |
-
"dd24270dc07942a6972fbfaf58129989",
|
303 |
-
"643903cd7a5b4a52a4687ec38eb8c4dc",
|
304 |
-
"13ae11c314664c44ae18d35cf57a1334",
|
305 |
-
"e68cfd05ba994a34b93107d2eab82ad3",
|
306 |
-
"ea283e7e8b234519b881c562b7eb01d3",
|
307 |
-
"1ec5329ea0434df4b74d0f311e016c3e"
|
308 |
-
]
|
309 |
-
},
|
310 |
-
"id": "43562f78",
|
311 |
-
"outputId": "58e8ce3f-b7cd-4cf6-dfa4-180b4a699cf9"
|
312 |
-
},
|
313 |
"outputs": [
|
314 |
{
|
315 |
-
"
|
316 |
-
"
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
"
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
},
|
326 |
-
"metadata": {}
|
327 |
-
},
|
328 |
-
{
|
329 |
-
"output_type": "stream",
|
330 |
-
"name": "stdout",
|
331 |
-
"text": [
|
332 |
-
"939\n",
|
333 |
-
"105\n",
|
334 |
-
"{'input_ids': [32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32013, 2042, 417, 274, 20391, 344, 2017, 3881, 4694, 12780, 10510, 523, 585, 7214, 185, 554, 7688, 276, 254, 5151, 13, 4451, 317, 254, 16135, 280, 254, 7214, 280, 254, 4892, 13, 185, 185, 5127, 440, 21598, 1, 185, 50, 577, 379, 1748, 782, 461, 8443, 9474, 13, 185, 5127, 440, 21598, 1, 207, 185, 19655, 327, 254, 2547, 11, 207, 185, 9875, 6270, 1208, 280, 254, 2547, 8507, 43, 378, 14204, 412, 9961, 8472, 185, 11972, 2942, 2547, 1208, 8507, 43, 1743, 8472, 185, 77, 767, 1523, 327, 254, 2547, 8507, 43, 9961, 8472, 185, 23861, 1064, 254, 2547, 317, 2842, 11, 185, 4968, 1064, 254, 2547, 317, 6288, 11, 185, 5456, 254, 2547, 438, 8143, 185, 477, 185, 185, 5127, 440, 14641, 1, 185, 21810, 21411, 11, 31131, 372, 440, 17, 19393, 19393, 1, 8507, 17, 16, 24, 22, 15, 1, 327, 254, 207, 16, 24, 22, 15, 4314, 8, 185, 19655, 280, 254, 1712, 2547, 11, 207, 185, 356, 26321, 335, 280, 254, 1712, 2547, 11, 185, 9875, 1208, 280, 254, 1712, 2547, 11, 185, 19464, 21411, 327, 254, 2612, 11, 185, 1984, 254, 2612, 438, 7226, 334, 19393, 19393, 12, 8213, 12, 7127, 650, 185, 1, 54, 1, 562, 254, 1712, 2547, 2103, 11, 440, 43, 1, 562, 653, 4726, 11, 185, 11695, 4054, 7226, 279, 254, 2612, 11, 185, 3267, 9054, 1396, 457, 254, 1712, 2547, 11, 185, 3267, 9054, 18012, 457, 254, 1712, 2547, 11, 185, 3267, 6206, 14986, 280, 254, 1712, 2547, 11, 185, 14565, 12, 3772, 2010, 9054, 1396, 457, 254, 1712, 2547, 11, 185, 14565, 12, 3772, 15343, 457, 254, 1712, 2547, 11, 185, 14565, 12, 3772, 2010, 6206, 14986, 280, 254, 1712, 2547, 11, 185, 6630, 8474, 1396, 457, 254, 1712, 2547, 11, 185, 6630, 8474, 18012, 457, 254, 1712, 2547, 11, 185, 6630, 5245, 14986, 280, 254, 1712, 2547, 11, 185, 2959, 4630, 11435, 5740, 457, 254, 1712, 2547, 11, 185, 1551, 4630, 11435, 5740, 457, 254, 1712, 2547, 11, 185, 11695, 11435, 5740, 457, 254, 1712, 2547, 11, 185, 468, 1923, 457, 254, 1712, 2547, 11, 185, 7537, 909, 457, 254, 1712, 2547, 11, 185, 28835, 457, 254, 1712, 2547, 11, 185, 788, 17396, 457, 254, 1712, 2547, 11, 185, 28200, 3931, 2724, 457, 254, 1712, 2547, 19555, 185, 11695, 3472, 18605, 457, 254, 1712, 2547, 11, 185, 13289, 14, 10646, 14026, 327, 254, 1712, 2547, 11, 185, 72, 35, 280, 254, 2292, 2547, 11, 185, 356, 26321, 335, 280, 254, 2292, 2547, 11, 185, 9875, 1208, 280, 254, 2292, 2547, 11, 185, 10108, 393, 4283, 473, 254, 2292, 2547, 486, 82, 12422, 11, 185, 1, 54, 1, 562, 254, 2292, 2547, 2103, 11, 440, 43, 1, 562, 653, 4726, 11, 185, 3267, 9054, 1396, 457, 254, 2292, 2547, 11, 185, 3267, 9054, 18012, 457, 254, 2292, 2547, 11, 185, 3267, 6206, 14986, 280, 254, 2292, 2547, 11, 185, 14565, 12, 3772, 2010, 9054, 1396, 457, 254, 2292, 2547, 11, 185, 14565, 12, 3772, 15343, 457, 254, 2292, 2547, 11, 185, 14565, 12, 3772, 2010, 6206, 14986, 280, 254, 2292, 2547, 11, 185, 6630, 8474, 1396, 457, 254, 2292, 2547, 11, 185, 6630, 8474, 18012, 457, 254, 2292, 2547, 11, 185, 6630, 5245, 14986, 280, 254, 2292, 2547, 11, 185, 2959, 4630, 11435, 5740, 457, 254, 2292, 2547, 11, 185, 1551, 4630, 11435, 5740, 457, 254, 2292, 2547, 11, 185, 11695, 11435, 5740, 457, 254, 2292, 2547, 11, 185, 468, 1923, 457, 254, 2292, 2547, 11, 185, 7537, 909, 457, 254, 2292, 2547, 11, 185, 28835, 457, 254, 2292, 2547, 11, 185, 788, 17396, 457, 254, 2292, 2547, 11, 185, 28200, 3931, 2724, 457, 254, 2292, 2547, 11, 185, 11695, 3472, 18605, 457, 254, 2292, 2547, 11, 185, 13289, 14, 10646, 14026, 327, 254, 2292, 2547, 11, 185, 15367, 980, 3192, 3905, 317, 2315, 334, 16, 405, 7589, 11, 207, 15, 405, 2357, 650, 185, 13388, 4314, 409, 1530, 23836, 11, 185, 477, 185, 185, 5127, 440, 1156, 62, 16204, 1, 185, 50, 577, 379, 4577, 13024, 11, 12144, 276, 254, 2612, 2365, 3752, 2612, 62, 304, 13, 185, 13403, 11866, 15787, 5787, 7449, 30862, 440, 1156, 62, 16204, 1, 334, 185, 19464, 2612, 21411, 11, 12050, 1975, 3812, 473, 2612, 2365, 185, 275, 6006, 21411, 185, 5816, 2547, 21411, 185, 5816, 2547, 31593, 335, 185, 5816, 2547, 3775, 185, 12168, 279, 254, 7416, 457, 254, 1712, 2547, 185, 9353, 5504, 3472, 457, 254, 1712, 2547, 185, 7212, 2963, 3472, 457, 254, 1712, 2547, 185, 17819, 370, 2012, 457, 254, 1712, 2547, 185, 7675, 280, 2012, 4177, 207, 185, 7675, 280, 2591, 254, 8129, 438, 16538, 185, 5816, 2547, 1936, 17396, 185, 11695, 1936, 17396, 457, 254, 1712, 2547, 185, 5816, 2547, 11435, 5740, 185, 12168, 838, 1936, 17396, 457, 254, 1712, 2547, 185, 11507, 2547, 21411, 185, 11507, 2547, 31593, 335, 185, 12168, 279, 254, 7416, 457, 254, 2292, 2547, 185, 9353, 5504, 3472, 457, 254, 2292, 2547, 185, 7212, 2963, 3472, 457, 254, 2292, 2547, 185, 17819, 370, 2012, 457, 254, 2292, 2547, 185, 11507, 2547, 1936, 17396, 185, 11695, 1936, 17396, 457, 254, 2292, 2547, 185, 11507, 2547, 11435, 5740, 185, 12168, 838, 1936, 17396, 457, 254, 2292, 2547, 185, 477, 185, 185, 185, 7605, 387, 885, 254, 4761, 280, 254, 2365, 344, 417, 4362, 276, 3495, 254, 3881, 4694, 5151, 11, 14843, 457, 929, 281, 11, 533, 441, 2816, 274, 11543, 13, 185, 185, 1459, 2194, 11, 185, 6522, 25, 185, 1, 2628, 317, 254, 1093, 3472, 254, 10851, 14204, 412, 9961, 463, 2634, 18605, 429, 1712, 1956, 185, 6522, 25, 185, 14641, 185, 185, 6522, 25, 185, 1, 15575, 9474, 417, 6288, 279, 254, 1967, 280, 8700, 1956, 185, 6231, 547, 25, 185, 21598, 185, 185, 4397, 25, 185, 1, 15575, 2547, 658, 254, 7495, 1594, 280, 2547, 1936, 17396, 279, 274, 2292, 2612, 1956, 185, 6522, 25, 185, 1156, 62, 16204, 185, 185, 4397, 25, 185, 1, 2628, 438, 254, 5126, 1594, 280, 4299, 9351, 3472, 18605, 457, 254, 10851, 14204, 412, 9961, 279, 1712, 19998, 2310, 254, 207, 17, 15, 17, 15, 4314, 1956, 185, 6522, 25, 185, 14641, 11, 746, 62, 16204, 185, 185, 4888, 317, 254, 3092, 25, 185, 2808, 1311, 3212, 3472, 1213, 254, 11738, 21915, 82, 8129, 2310, 254, 207, 16, 24, 24, 21, 4314, 30, 185, 51, 2368, 25, 185, 3204, 14641, 3676], 'attention_mask': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32014, 32013, 2042, 417, 274, 20391, 344, 2017, 3881, 4694, 12780, 10510, 523, 585, 7214, 185, 554, 7688, 276, 254, 5151, 13, 4451, 317, 254, 16135, 280, 254, 7214, 280, 254, 4892, 13, 185, 185, 5127, 440, 21598, 1, 185, 50, 577, 379, 1748, 782, 461, 8443, 9474, 13, 185, 5127, 440, 21598, 1, 207, 185, 19655, 327, 254, 2547, 11, 207, 185, 9875, 6270, 1208, 280, 254, 2547, 8507, 43, 378, 14204, 412, 9961, 8472, 185, 11972, 2942, 2547, 1208, 8507, 43, 1743, 8472, 185, 77, 767, 1523, 327, 254, 2547, 8507, 43, 9961, 8472, 185, 23861, 1064, 254, 2547, 317, 2842, 11, 185, 4968, 1064, 254, 2547, 317, 6288, 11, 185, 5456, 254, 2547, 438, 8143, 185, 477, 185, 185, 5127, 440, 14641, 1, 185, 21810, 21411, 11, 31131, 372, 440, 17, 19393, 19393, 1, 8507, 17, 16, 24, 22, 15, 1, 327, 254, 207, 16, 24, 22, 15, 4314, 8, 185, 19655, 280, 254, 1712, 2547, 11, 207, 185, 356, 26321, 335, 280, 254, 1712, 2547, 11, 185, 9875, 1208, 280, 254, 1712, 2547, 11, 185, 19464, 21411, 327, 254, 2612, 11, 185, 1984, 254, 2612, 438, 7226, 334, 19393, 19393, 12, 8213, 12, 7127, 650, 185, 1, 54, 1, 562, 254, 1712, 2547, 2103, 11, 440, 43, 1, 562, 653, 4726, 11, 185, 11695, 4054, 7226, 279, 254, 2612, 11, 185, 3267, 9054, 1396, 457, 254, 1712, 2547, 11, 185, 3267, 9054, 18012, 457, 254, 1712, 2547, 11, 185, 3267, 6206, 14986, 280, 254, 1712, 2547, 11, 185, 14565, 12, 3772, 2010, 9054, 1396, 457, 254, 1712, 2547, 11, 185, 14565, 12, 3772, 15343, 457, 254, 1712, 2547, 11, 185, 14565, 12, 3772, 2010, 6206, 14986, 280, 254, 1712, 2547, 11, 185, 6630, 8474, 1396, 457, 254, 1712, 2547, 11, 185, 6630, 8474, 18012, 457, 254, 1712, 2547, 11, 185, 6630, 5245, 14986, 280, 254, 1712, 2547, 11, 185, 2959, 4630, 11435, 5740, 457, 254, 1712, 2547, 11, 185, 1551, 4630, 11435, 5740, 457, 254, 1712, 2547, 11, 185, 11695, 11435, 5740, 457, 254, 1712, 2547, 11, 185, 468, 1923, 457, 254, 1712, 2547, 11, 185, 7537, 909, 457, 254, 1712, 2547, 11, 185, 28835, 457, 254, 1712, 2547, 11, 185, 788, 17396, 457, 254, 1712, 2547, 11, 185, 28200, 3931, 2724, 457, 254, 1712, 2547, 19555, 185, 11695, 3472, 18605, 457, 254, 1712, 2547, 11, 185, 13289, 14, 10646, 14026, 327, 254, 1712, 2547, 11, 185, 72, 35, 280, 254, 2292, 2547, 11, 185, 356, 26321, 335, 280, 254, 2292, 2547, 11, 185, 9875, 1208, 280, 254, 2292, 2547, 11, 185, 10108, 393, 4283, 473, 254, 2292, 2547, 486, 82, 12422, 11, 185, 1, 54, 1, 562, 254, 2292, 2547, 2103, 11, 440, 43, 1, 562, 653, 4726, 11, 185, 3267, 9054, 1396, 457, 254, 2292, 2547, 11, 185, 3267, 9054, 18012, 457, 254, 2292, 2547, 11, 185, 3267, 6206, 14986, 280, 254, 2292, 2547, 11, 185, 14565, 12, 3772, 2010, 9054, 1396, 457, 254, 2292, 2547, 11, 185, 14565, 12, 3772, 15343, 457, 254, 2292, 2547, 11, 185, 14565, 12, 3772, 2010, 6206, 14986, 280, 254, 2292, 2547, 11, 185, 6630, 8474, 1396, 457, 254, 2292, 2547, 11, 185, 6630, 8474, 18012, 457, 254, 2292, 2547, 11, 185, 6630, 5245, 14986, 280, 254, 2292, 2547, 11, 185, 2959, 4630, 11435, 5740, 457, 254, 2292, 2547, 11, 185, 1551, 4630, 11435, 5740, 457, 254, 2292, 2547, 11, 185, 11695, 11435, 5740, 457, 254, 2292, 2547, 11, 185, 468, 1923, 457, 254, 2292, 2547, 11, 185, 7537, 909, 457, 254, 2292, 2547, 11, 185, 28835, 457, 254, 2292, 2547, 11, 185, 788, 17396, 457, 254, 2292, 2547, 11, 185, 28200, 3931, 2724, 457, 254, 2292, 2547, 11, 185, 11695, 3472, 18605, 457, 254, 2292, 2547, 11, 185, 13289, 14, 10646, 14026, 327, 254, 2292, 2547, 11, 185, 15367, 980, 3192, 3905, 317, 2315, 334, 16, 405, 7589, 11, 207, 15, 405, 2357, 650, 185, 13388, 4314, 409, 1530, 23836, 11, 185, 477, 185, 185, 5127, 440, 1156, 62, 16204, 1, 185, 50, 577, 379, 4577, 13024, 11, 12144, 276, 254, 2612, 2365, 3752, 2612, 62, 304, 13, 185, 13403, 11866, 15787, 5787, 7449, 30862, 440, 1156, 62, 16204, 1, 334, 185, 19464, 2612, 21411, 11, 12050, 1975, 3812, 473, 2612, 2365, 185, 275, 6006, 21411, 185, 5816, 2547, 21411, 185, 5816, 2547, 31593, 335, 185, 5816, 2547, 3775, 185, 12168, 279, 254, 7416, 457, 254, 1712, 2547, 185, 9353, 5504, 3472, 457, 254, 1712, 2547, 185, 7212, 2963, 3472, 457, 254, 1712, 2547, 185, 17819, 370, 2012, 457, 254, 1712, 2547, 185, 7675, 280, 2012, 4177, 207, 185, 7675, 280, 2591, 254, 8129, 438, 16538, 185, 5816, 2547, 1936, 17396, 185, 11695, 1936, 17396, 457, 254, 1712, 2547, 185, 5816, 2547, 11435, 5740, 185, 12168, 838, 1936, 17396, 457, 254, 1712, 2547, 185, 11507, 2547, 21411, 185, 11507, 2547, 31593, 335, 185, 12168, 279, 254, 7416, 457, 254, 2292, 2547, 185, 9353, 5504, 3472, 457, 254, 2292, 2547, 185, 7212, 2963, 3472, 457, 254, 2292, 2547, 185, 17819, 370, 2012, 457, 254, 2292, 2547, 185, 11507, 2547, 1936, 17396, 185, 11695, 1936, 17396, 457, 254, 2292, 2547, 185, 11507, 2547, 11435, 5740, 185, 12168, 838, 1936, 17396, 457, 254, 2292, 2547, 185, 477, 185, 185, 185, 7605, 387, 885, 254, 4761, 280, 254, 2365, 344, 417, 4362, 276, 3495, 254, 3881, 4694, 5151, 11, 14843, 457, 929, 281, 11, 533, 441, 2816, 274, 11543, 13, 185, 185, 1459, 2194, 11, 185, 6522, 25, 185, 1, 2628, 317, 254, 1093, 3472, 254, 10851, 14204, 412, 9961, 463, 2634, 18605, 429, 1712, 1956, 185, 6522, 25, 185, 14641, 185, 185, 6522, 25, 185, 1, 15575, 9474, 417, 6288, 279, 254, 1967, 280, 8700, 1956, 185, 6231, 547, 25, 185, 21598, 185, 185, 4397, 25, 185, 1, 15575, 2547, 658, 254, 7495, 1594, 280, 2547, 1936, 17396, 279, 274, 2292, 2612, 1956, 185, 6522, 25, 185, 1156, 62, 16204, 185, 185, 4397, 25, 185, 1, 2628, 438, 254, 5126, 1594, 280, 4299, 9351, 3472, 18605, 457, 254, 10851, 14204, 412, 9961, 279, 1712, 19998, 2310, 254, 207, 17, 15, 17, 15, 4314, 1956, 185, 6522, 25, 185, 14641, 11, 746, 62, 16204, 185, 185, 4888, 317, 254, 3092, 25, 185, 2808, 1311, 3212, 3472, 1213, 254, 11738, 21915, 82, 8129, 2310, 254, 207, 16, 24, 24, 21, 4314, 30, 185, 51, 2368, 25, 185, 3204, 14641, 3676]}\n"
|
335 |
]
|
336 |
}
|
337 |
],
|
338 |
"source": [
|
|
|
339 |
"\n",
|
340 |
-
"
|
341 |
-
"# Apply formatting\n",
|
342 |
-
"tokenized_dataset = dataset.map(\n",
|
343 |
-
" lambda x: format_deepseek_chat(x, tokenizer),\n",
|
344 |
-
" remove_columns=[\"natural_query\", \"tables\"]\n",
|
345 |
-
")\n",
|
346 |
-
"\n",
|
347 |
-
"# Split into train/validation\n",
|
348 |
-
"split = int(0.9 * len(tokenized_dataset)) # 90% train, 10% validation\n",
|
349 |
-
"train_dataset = tokenized_dataset.select(range(split))\n",
|
350 |
-
"val_dataset = tokenized_dataset.select(range(split, len(tokenized_dataset)))\n",
|
351 |
-
"\n",
|
352 |
-
"print(len(train_dataset))\n",
|
353 |
-
"print(len(val_dataset))\n",
|
354 |
-
"\n",
|
355 |
-
"for v in val_dataset:\n",
|
356 |
-
" print(v)\n",
|
357 |
-
" break"
|
358 |
]
|
359 |
},
|
360 |
{
|
361 |
"cell_type": "code",
|
362 |
-
"execution_count":
|
363 |
"id": "8890a657",
|
364 |
"metadata": {
|
365 |
"colab": {
|
@@ -370,10 +245,18 @@
|
|
370 |
},
|
371 |
"outputs": [
|
372 |
{
|
373 |
-
"output_type": "stream",
|
374 |
"name": "stdout",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
"text": [
|
376 |
-
"
|
|
|
377 |
]
|
378 |
}
|
379 |
],
|
@@ -398,13 +281,12 @@
|
|
398 |
"\n",
|
399 |
"# Wrap model with LoRA adapters\n",
|
400 |
"model = get_peft_model(model, lora_config)\n",
|
401 |
-
"model = model.to(device)
|
402 |
-
"model.print_trainable_parameters() # Show trainable parameters count"
|
403 |
]
|
404 |
},
|
405 |
{
|
406 |
"cell_type": "code",
|
407 |
-
"execution_count":
|
408 |
"id": "d9508451",
|
409 |
"metadata": {
|
410 |
"colab": {
|
@@ -415,10 +297,10 @@
|
|
415 |
},
|
416 |
"outputs": [
|
417 |
{
|
418 |
-
"output_type": "stream",
|
419 |
"name": "stderr",
|
|
|
420 |
"text": [
|
421 |
-
"
|
422 |
" trainer = Trainer(\n",
|
423 |
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
|
424 |
]
|
@@ -457,7 +339,7 @@
|
|
457 |
},
|
458 |
{
|
459 |
"cell_type": "code",
|
460 |
-
"execution_count":
|
461 |
"id": "b0ff5278",
|
462 |
"metadata": {
|
463 |
"colab": {
|
@@ -469,101 +351,13 @@
|
|
469 |
},
|
470 |
"outputs": [
|
471 |
{
|
472 |
-
"output_type": "stream",
|
473 |
"name": "stderr",
|
|
|
474 |
"text": [
|
475 |
-
"\
|
476 |
-
"\
|
477 |
-
"\
|
478 |
]
|
479 |
-
},
|
480 |
-
{
|
481 |
-
"output_type": "display_data",
|
482 |
-
"data": {
|
483 |
-
"text/plain": [
|
484 |
-
"<IPython.core.display.HTML object>"
|
485 |
-
],
|
486 |
-
"text/html": [
|
487 |
-
"Tracking run with wandb version 0.19.9"
|
488 |
-
]
|
489 |
-
},
|
490 |
-
"metadata": {}
|
491 |
-
},
|
492 |
-
{
|
493 |
-
"output_type": "display_data",
|
494 |
-
"data": {
|
495 |
-
"text/plain": [
|
496 |
-
"<IPython.core.display.HTML object>"
|
497 |
-
],
|
498 |
-
"text/html": [
|
499 |
-
"Run data is saved locally in <code>/content/wandb/run-20250420_174906-5ypbflqe</code>"
|
500 |
-
]
|
501 |
-
},
|
502 |
-
"metadata": {}
|
503 |
-
},
|
504 |
-
{
|
505 |
-
"output_type": "display_data",
|
506 |
-
"data": {
|
507 |
-
"text/plain": [
|
508 |
-
"<IPython.core.display.HTML object>"
|
509 |
-
],
|
510 |
-
"text/html": [
|
511 |
-
"Syncing run <strong><a href='https://wandb.ai/licesma-usc/huggingface/runs/5ypbflqe' target=\"_blank\">/content/drive/MyDrive/sql_gen/dyn_rag_test</a></strong> to <a href='https://wandb.ai/licesma-usc/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"
|
512 |
-
]
|
513 |
-
},
|
514 |
-
"metadata": {}
|
515 |
-
},
|
516 |
-
{
|
517 |
-
"output_type": "display_data",
|
518 |
-
"data": {
|
519 |
-
"text/plain": [
|
520 |
-
"<IPython.core.display.HTML object>"
|
521 |
-
],
|
522 |
-
"text/html": [
|
523 |
-
" View project at <a href='https://wandb.ai/licesma-usc/huggingface' target=\"_blank\">https://wandb.ai/licesma-usc/huggingface</a>"
|
524 |
-
]
|
525 |
-
},
|
526 |
-
"metadata": {}
|
527 |
-
},
|
528 |
-
{
|
529 |
-
"output_type": "display_data",
|
530 |
-
"data": {
|
531 |
-
"text/plain": [
|
532 |
-
"<IPython.core.display.HTML object>"
|
533 |
-
],
|
534 |
-
"text/html": [
|
535 |
-
" View run at <a href='https://wandb.ai/licesma-usc/huggingface/runs/5ypbflqe' target=\"_blank\">https://wandb.ai/licesma-usc/huggingface/runs/5ypbflqe</a>"
|
536 |
-
]
|
537 |
-
},
|
538 |
-
"metadata": {}
|
539 |
-
},
|
540 |
-
{
|
541 |
-
"output_type": "display_data",
|
542 |
-
"data": {
|
543 |
-
"text/plain": [
|
544 |
-
"<IPython.core.display.HTML object>"
|
545 |
-
],
|
546 |
-
"text/html": [
|
547 |
-
"\n",
|
548 |
-
" <div>\n",
|
549 |
-
" \n",
|
550 |
-
" <progress value='4' max='580' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
551 |
-
" [ 4/580 00:11 < 54:56, 0.17 it/s, Epoch 0.05/10]\n",
|
552 |
-
" </div>\n",
|
553 |
-
" <table border=\"1\" class=\"dataframe\">\n",
|
554 |
-
" <thead>\n",
|
555 |
-
" <tr style=\"text-align: left;\">\n",
|
556 |
-
" <th>Epoch</th>\n",
|
557 |
-
" <th>Training Loss</th>\n",
|
558 |
-
" <th>Validation Loss</th>\n",
|
559 |
-
" </tr>\n",
|
560 |
-
" </thead>\n",
|
561 |
-
" <tbody>\n",
|
562 |
-
" </tbody>\n",
|
563 |
-
"</table><p>"
|
564 |
-
]
|
565 |
-
},
|
566 |
-
"metadata": {}
|
567 |
}
|
568 |
],
|
569 |
"source": [
|
@@ -578,6 +372,12 @@
|
|
578 |
},
|
579 |
{
|
580 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
"source": [
|
582 |
"\n",
|
583 |
"# Prepare query with the same prompt\n",
|
@@ -593,16 +393,16 @@
|
|
593 |
"model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n",
|
594 |
"\n",
|
595 |
"print(\"Generated Tables:\", model_output)"
|
596 |
-
]
|
597 |
-
"metadata": {
|
598 |
-
"id": "J7qO7FE73i40"
|
599 |
-
},
|
600 |
-
"id": "J7qO7FE73i40",
|
601 |
-
"execution_count": null,
|
602 |
-
"outputs": []
|
603 |
},
|
604 |
{
|
605 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
606 |
"source": [
|
607 |
"import sqlite3 as sql\n",
|
608 |
"\n",
|
@@ -620,41 +420,40 @@
|
|
620 |
" question, tables = full_example[prompt_length:].split(\"Tables:\\n\")\n",
|
621 |
" print(question)\n",
|
622 |
" print(tables)\n",
|
623 |
-
" break\n"
|
624 |
-
|
625 |
-
],
|
626 |
-
"metadata": {
|
627 |
-
"id": "kwHMVyQa3n89"
|
628 |
-
},
|
629 |
-
"id": "kwHMVyQa3n89",
|
630 |
-
"execution_count": null,
|
631 |
-
"outputs": []
|
632 |
},
|
633 |
{
|
634 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
"source": [
|
636 |
"def extract_tables_from_string(s):\n",
|
637 |
" keywords = {\"game\", \"team\", \"other_stats\"}\n",
|
638 |
" found = {k for k in keywords if k in s}\n",
|
639 |
" return found"
|
640 |
-
]
|
641 |
-
"metadata": {
|
642 |
-
"id": "LhiHqAaB9uE4"
|
643 |
-
},
|
644 |
-
"id": "LhiHqAaB9uE4",
|
645 |
-
"execution_count": null,
|
646 |
-
"outputs": []
|
647 |
},
|
648 |
{
|
649 |
"cell_type": "markdown",
|
650 |
-
"
|
651 |
"metadata": {
|
652 |
"id": "Kdd8nxWD9txh"
|
653 |
},
|
654 |
-
"
|
655 |
},
|
656 |
{
|
657 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
"source": [
|
659 |
"def compare_table_lists(actual_tables, generated_tables):\n",
|
660 |
" actual_set = extract_tables_from_string(actual_tables)\n",
|
@@ -662,16 +461,16 @@
|
|
662 |
"\n",
|
663 |
" # Check if they match\n",
|
664 |
" return generated_set == actual_set"
|
665 |
-
]
|
666 |
-
"metadata": {
|
667 |
-
"id": "KjAXaUgp4TfY"
|
668 |
-
},
|
669 |
-
"id": "KjAXaUgp4TfY",
|
670 |
-
"execution_count": null,
|
671 |
-
"outputs": []
|
672 |
},
|
673 |
{
|
674 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
675 |
"source": [
|
676 |
"\n",
|
677 |
"num_sql_matched = 0\n",
|
@@ -709,16 +508,16 @@
|
|
709 |
"\n",
|
710 |
"print(\"Accuracy :\", num_sql_matched/len(val_dataset))\n",
|
711 |
"\n"
|
712 |
-
]
|
713 |
-
"metadata": {
|
714 |
-
"id": "8h7bpMML6G6v"
|
715 |
-
},
|
716 |
-
"id": "8h7bpMML6G6v",
|
717 |
-
"execution_count": null,
|
718 |
-
"outputs": []
|
719 |
},
|
720 |
{
|
721 |
"cell_type": "code",
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
"source": [
|
723 |
"\n",
|
724 |
"num_sql_matched = 0\n",
|
@@ -756,29 +555,28 @@
|
|
756 |
"\n",
|
757 |
"print(\"Accuracy :\", num_sql_matched/len(val_dataset))\n",
|
758 |
"\n"
|
759 |
-
]
|
760 |
-
"metadata": {
|
761 |
-
"id": "CoJeZ4FoUMp_"
|
762 |
-
},
|
763 |
-
"execution_count": null,
|
764 |
-
"outputs": [],
|
765 |
-
"id": "CoJeZ4FoUMp_"
|
766 |
},
|
767 |
{
|
768 |
"cell_type": "code",
|
769 |
-
"
|
770 |
-
|
771 |
-
"tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)\n"
|
772 |
-
],
|
773 |
"metadata": {
|
774 |
"id": "lNG1joS3T8DN"
|
775 |
},
|
776 |
-
"
|
777 |
-
"
|
778 |
-
|
|
|
|
|
779 |
}
|
780 |
],
|
781 |
"metadata": {
|
|
|
|
|
|
|
|
|
|
|
782 |
"kernelspec": {
|
783 |
"display_name": "Python 3",
|
784 |
"name": "python3"
|
@@ -795,39 +593,64 @@
|
|
795 |
"pygments_lexer": "ipython3",
|
796 |
"version": "3.11.11"
|
797 |
},
|
798 |
-
"colab": {
|
799 |
-
"provenance": [],
|
800 |
-
"gpuType": "A100"
|
801 |
-
},
|
802 |
-
"accelerator": "GPU",
|
803 |
"widgets": {
|
804 |
"application/vnd.jupyter.widget-state+json": {
|
805 |
-
"
|
806 |
-
"model_module": "@jupyter-widgets/
|
807 |
-
"
|
808 |
-
"
|
809 |
"state": {
|
810 |
-
"
|
811 |
-
"
|
812 |
-
"
|
813 |
-
"_model_name": "HBoxModel",
|
814 |
"_view_count": null,
|
815 |
-
"_view_module": "@jupyter-widgets/
|
816 |
-
"_view_module_version": "1.
|
817 |
-
"_view_name": "
|
818 |
-
"
|
819 |
-
"
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
"
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
|
829 |
-
|
830 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
831 |
"state": {
|
832 |
"_dom_classes": [],
|
833 |
"_model_module": "@jupyter-widgets/controls",
|
@@ -845,10 +668,10 @@
|
|
845 |
"value": "Fetching 37 files: 100%"
|
846 |
}
|
847 |
},
|
848 |
-
"
|
849 |
"model_module": "@jupyter-widgets/controls",
|
850 |
-
"model_name": "FloatProgressModel",
|
851 |
"model_module_version": "1.5.0",
|
|
|
852 |
"state": {
|
853 |
"_dom_classes": [],
|
854 |
"_model_module": "@jupyter-widgets/controls",
|
@@ -861,39 +684,33 @@
|
|
861 |
"bar_style": "success",
|
862 |
"description": "",
|
863 |
"description_tooltip": null,
|
864 |
-
"layout": "
|
865 |
-
"max":
|
866 |
"min": 0,
|
867 |
"orientation": "horizontal",
|
868 |
-
"style": "
|
869 |
-
"value":
|
870 |
}
|
871 |
},
|
872 |
-
"
|
873 |
"model_module": "@jupyter-widgets/controls",
|
874 |
-
"model_name": "HTMLModel",
|
875 |
"model_module_version": "1.5.0",
|
|
|
876 |
"state": {
|
877 |
-
"_dom_classes": [],
|
878 |
"_model_module": "@jupyter-widgets/controls",
|
879 |
"_model_module_version": "1.5.0",
|
880 |
-
"_model_name": "
|
881 |
"_view_count": null,
|
882 |
-
"_view_module": "@jupyter-widgets/
|
883 |
-
"_view_module_version": "1.
|
884 |
-
"_view_name": "
|
885 |
-
"
|
886 |
-
"description_tooltip": null,
|
887 |
-
"layout": "IPY_MODEL_7751defbc4534d518d9e923b9019aa8b",
|
888 |
-
"placeholder": "",
|
889 |
-
"style": "IPY_MODEL_fe6352bce22a40e7a936e7f90313bd02",
|
890 |
-
"value": " 37/37 [00:00<00:00, 3657.54it/s]"
|
891 |
}
|
892 |
},
|
893 |
-
"
|
894 |
"model_module": "@jupyter-widgets/base",
|
895 |
-
"model_name": "LayoutModel",
|
896 |
"model_module_version": "1.2.0",
|
|
|
897 |
"state": {
|
898 |
"_model_module": "@jupyter-widgets/base",
|
899 |
"_model_module_version": "1.2.0",
|
@@ -942,10 +759,46 @@
|
|
942 |
"width": null
|
943 |
}
|
944 |
},
|
945 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
946 |
"model_module": "@jupyter-widgets/base",
|
947 |
-
"model_name": "LayoutModel",
|
948 |
"model_module_version": "1.2.0",
|
|
|
949 |
"state": {
|
950 |
"_model_module": "@jupyter-widgets/base",
|
951 |
"_model_module_version": "1.2.0",
|
@@ -994,10 +847,10 @@
|
|
994 |
"width": null
|
995 |
}
|
996 |
},
|
997 |
-
"
|
998 |
"model_module": "@jupyter-widgets/controls",
|
999 |
-
"model_name": "DescriptionStyleModel",
|
1000 |
"model_module_version": "1.5.0",
|
|
|
1001 |
"state": {
|
1002 |
"_model_module": "@jupyter-widgets/controls",
|
1003 |
"_model_module_version": "1.5.0",
|
@@ -1009,10 +862,32 @@
|
|
1009 |
"description_width": ""
|
1010 |
}
|
1011 |
},
|
1012 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1013 |
"model_module": "@jupyter-widgets/base",
|
1014 |
-
"model_name": "LayoutModel",
|
1015 |
"model_module_version": "1.2.0",
|
|
|
1016 |
"state": {
|
1017 |
"_model_module": "@jupyter-widgets/base",
|
1018 |
"_model_module_version": "1.2.0",
|
@@ -1061,26 +936,10 @@
|
|
1061 |
"width": null
|
1062 |
}
|
1063 |
},
|
1064 |
-
"
|
1065 |
-
"model_module": "@jupyter-widgets/controls",
|
1066 |
-
"model_name": "ProgressStyleModel",
|
1067 |
-
"model_module_version": "1.5.0",
|
1068 |
-
"state": {
|
1069 |
-
"_model_module": "@jupyter-widgets/controls",
|
1070 |
-
"_model_module_version": "1.5.0",
|
1071 |
-
"_model_name": "ProgressStyleModel",
|
1072 |
-
"_view_count": null,
|
1073 |
-
"_view_module": "@jupyter-widgets/base",
|
1074 |
-
"_view_module_version": "1.2.0",
|
1075 |
-
"_view_name": "StyleView",
|
1076 |
-
"bar_color": null,
|
1077 |
-
"description_width": ""
|
1078 |
-
}
|
1079 |
-
},
|
1080 |
-
"7751defbc4534d518d9e923b9019aa8b": {
|
1081 |
"model_module": "@jupyter-widgets/base",
|
1082 |
-
"model_name": "LayoutModel",
|
1083 |
"model_module_version": "1.2.0",
|
|
|
1084 |
"state": {
|
1085 |
"_model_module": "@jupyter-widgets/base",
|
1086 |
"_model_module_version": "1.2.0",
|
@@ -1129,25 +988,10 @@
|
|
1129 |
"width": null
|
1130 |
}
|
1131 |
},
|
1132 |
-
"
|
1133 |
"model_module": "@jupyter-widgets/controls",
|
1134 |
-
"model_name": "DescriptionStyleModel",
|
1135 |
"model_module_version": "1.5.0",
|
1136 |
-
"state": {
|
1137 |
-
"_model_module": "@jupyter-widgets/controls",
|
1138 |
-
"_model_module_version": "1.5.0",
|
1139 |
-
"_model_name": "DescriptionStyleModel",
|
1140 |
-
"_view_count": null,
|
1141 |
-
"_view_module": "@jupyter-widgets/base",
|
1142 |
-
"_view_module_version": "1.2.0",
|
1143 |
-
"_view_name": "StyleView",
|
1144 |
-
"description_width": ""
|
1145 |
-
}
|
1146 |
-
},
|
1147 |
-
"68ff2fc00bd041e7b79a811e3de1e596": {
|
1148 |
-
"model_module": "@jupyter-widgets/controls",
|
1149 |
"model_name": "HBoxModel",
|
1150 |
-
"model_module_version": "1.5.0",
|
1151 |
"state": {
|
1152 |
"_dom_classes": [],
|
1153 |
"_model_module": "@jupyter-widgets/controls",
|
@@ -1159,62 +1003,17 @@
|
|
1159 |
"_view_name": "HBoxView",
|
1160 |
"box_style": "",
|
1161 |
"children": [
|
1162 |
-
"
|
1163 |
-
"
|
1164 |
-
"
|
1165 |
],
|
1166 |
-
"layout": "
|
1167 |
-
}
|
1168 |
-
},
|
1169 |
-
"4c41e81bcd254df7b1265206a5a6b40b": {
|
1170 |
-
"model_module": "@jupyter-widgets/controls",
|
1171 |
-
"model_name": "HTMLModel",
|
1172 |
-
"model_module_version": "1.5.0",
|
1173 |
-
"state": {
|
1174 |
-
"_dom_classes": [],
|
1175 |
-
"_model_module": "@jupyter-widgets/controls",
|
1176 |
-
"_model_module_version": "1.5.0",
|
1177 |
-
"_model_name": "HTMLModel",
|
1178 |
-
"_view_count": null,
|
1179 |
-
"_view_module": "@jupyter-widgets/controls",
|
1180 |
-
"_view_module_version": "1.5.0",
|
1181 |
-
"_view_name": "HTMLView",
|
1182 |
-
"description": "",
|
1183 |
-
"description_tooltip": null,
|
1184 |
-
"layout": "IPY_MODEL_dd24270dc07942a6972fbfaf58129989",
|
1185 |
-
"placeholder": "",
|
1186 |
-
"style": "IPY_MODEL_643903cd7a5b4a52a4687ec38eb8c4dc",
|
1187 |
-
"value": "Map: 100%"
|
1188 |
}
|
1189 |
},
|
1190 |
-
"
|
1191 |
"model_module": "@jupyter-widgets/controls",
|
1192 |
-
"model_name": "FloatProgressModel",
|
1193 |
"model_module_version": "1.5.0",
|
1194 |
-
"state": {
|
1195 |
-
"_dom_classes": [],
|
1196 |
-
"_model_module": "@jupyter-widgets/controls",
|
1197 |
-
"_model_module_version": "1.5.0",
|
1198 |
-
"_model_name": "FloatProgressModel",
|
1199 |
-
"_view_count": null,
|
1200 |
-
"_view_module": "@jupyter-widgets/controls",
|
1201 |
-
"_view_module_version": "1.5.0",
|
1202 |
-
"_view_name": "ProgressView",
|
1203 |
-
"bar_style": "success",
|
1204 |
-
"description": "",
|
1205 |
-
"description_tooltip": null,
|
1206 |
-
"layout": "IPY_MODEL_13ae11c314664c44ae18d35cf57a1334",
|
1207 |
-
"max": 1044,
|
1208 |
-
"min": 0,
|
1209 |
-
"orientation": "horizontal",
|
1210 |
-
"style": "IPY_MODEL_e68cfd05ba994a34b93107d2eab82ad3",
|
1211 |
-
"value": 1044
|
1212 |
-
}
|
1213 |
-
},
|
1214 |
-
"e11d04a9d22a4229922e3eb4e3eb6466": {
|
1215 |
-
"model_module": "@jupyter-widgets/controls",
|
1216 |
"model_name": "HTMLModel",
|
1217 |
-
"model_module_version": "1.5.0",
|
1218 |
"state": {
|
1219 |
"_dom_classes": [],
|
1220 |
"_model_module": "@jupyter-widgets/controls",
|
@@ -1226,68 +1025,16 @@
|
|
1226 |
"_view_name": "HTMLView",
|
1227 |
"description": "",
|
1228 |
"description_tooltip": null,
|
1229 |
-
"layout": "
|
1230 |
"placeholder": "",
|
1231 |
-
"style": "
|
1232 |
-
"value": "
|
1233 |
}
|
1234 |
},
|
1235 |
-
"
|
1236 |
"model_module": "@jupyter-widgets/base",
|
1237 |
-
"model_name": "LayoutModel",
|
1238 |
"model_module_version": "1.2.0",
|
1239 |
-
"state": {
|
1240 |
-
"_model_module": "@jupyter-widgets/base",
|
1241 |
-
"_model_module_version": "1.2.0",
|
1242 |
-
"_model_name": "LayoutModel",
|
1243 |
-
"_view_count": null,
|
1244 |
-
"_view_module": "@jupyter-widgets/base",
|
1245 |
-
"_view_module_version": "1.2.0",
|
1246 |
-
"_view_name": "LayoutView",
|
1247 |
-
"align_content": null,
|
1248 |
-
"align_items": null,
|
1249 |
-
"align_self": null,
|
1250 |
-
"border": null,
|
1251 |
-
"bottom": null,
|
1252 |
-
"display": null,
|
1253 |
-
"flex": null,
|
1254 |
-
"flex_flow": null,
|
1255 |
-
"grid_area": null,
|
1256 |
-
"grid_auto_columns": null,
|
1257 |
-
"grid_auto_flow": null,
|
1258 |
-
"grid_auto_rows": null,
|
1259 |
-
"grid_column": null,
|
1260 |
-
"grid_gap": null,
|
1261 |
-
"grid_row": null,
|
1262 |
-
"grid_template_areas": null,
|
1263 |
-
"grid_template_columns": null,
|
1264 |
-
"grid_template_rows": null,
|
1265 |
-
"height": null,
|
1266 |
-
"justify_content": null,
|
1267 |
-
"justify_items": null,
|
1268 |
-
"left": null,
|
1269 |
-
"margin": null,
|
1270 |
-
"max_height": null,
|
1271 |
-
"max_width": null,
|
1272 |
-
"min_height": null,
|
1273 |
-
"min_width": null,
|
1274 |
-
"object_fit": null,
|
1275 |
-
"object_position": null,
|
1276 |
-
"order": null,
|
1277 |
-
"overflow": null,
|
1278 |
-
"overflow_x": null,
|
1279 |
-
"overflow_y": null,
|
1280 |
-
"padding": null,
|
1281 |
-
"right": null,
|
1282 |
-
"top": null,
|
1283 |
-
"visibility": null,
|
1284 |
-
"width": null
|
1285 |
-
}
|
1286 |
-
},
|
1287 |
-
"dd24270dc07942a6972fbfaf58129989": {
|
1288 |
-
"model_module": "@jupyter-widgets/base",
|
1289 |
"model_name": "LayoutModel",
|
1290 |
-
"model_module_version": "1.2.0",
|
1291 |
"state": {
|
1292 |
"_model_module": "@jupyter-widgets/base",
|
1293 |
"_model_module_version": "1.2.0",
|
@@ -1336,25 +1083,26 @@
|
|
1336 |
"width": null
|
1337 |
}
|
1338 |
},
|
1339 |
-
"
|
1340 |
"model_module": "@jupyter-widgets/controls",
|
1341 |
-
"model_name": "DescriptionStyleModel",
|
1342 |
"model_module_version": "1.5.0",
|
|
|
1343 |
"state": {
|
1344 |
"_model_module": "@jupyter-widgets/controls",
|
1345 |
"_model_module_version": "1.5.0",
|
1346 |
-
"_model_name": "
|
1347 |
"_view_count": null,
|
1348 |
"_view_module": "@jupyter-widgets/base",
|
1349 |
"_view_module_version": "1.2.0",
|
1350 |
"_view_name": "StyleView",
|
|
|
1351 |
"description_width": ""
|
1352 |
}
|
1353 |
},
|
1354 |
-
"
|
1355 |
"model_module": "@jupyter-widgets/base",
|
1356 |
-
"model_name": "LayoutModel",
|
1357 |
"model_module_version": "1.2.0",
|
|
|
1358 |
"state": {
|
1359 |
"_model_module": "@jupyter-widgets/base",
|
1360 |
"_model_module_version": "1.2.0",
|
@@ -1403,10 +1151,31 @@
|
|
1403 |
"width": null
|
1404 |
}
|
1405 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1406 |
"e68cfd05ba994a34b93107d2eab82ad3": {
|
1407 |
"model_module": "@jupyter-widgets/controls",
|
1408 |
-
"model_name": "ProgressStyleModel",
|
1409 |
"model_module_version": "1.5.0",
|
|
|
1410 |
"state": {
|
1411 |
"_model_module": "@jupyter-widgets/controls",
|
1412 |
"_model_module_version": "1.5.0",
|
@@ -1421,8 +1190,8 @@
|
|
1421 |
},
|
1422 |
"ea283e7e8b234519b881c562b7eb01d3": {
|
1423 |
"model_module": "@jupyter-widgets/base",
|
1424 |
-
"model_name": "LayoutModel",
|
1425 |
"model_module_version": "1.2.0",
|
|
|
1426 |
"state": {
|
1427 |
"_model_module": "@jupyter-widgets/base",
|
1428 |
"_model_module_version": "1.2.0",
|
@@ -1471,10 +1240,34 @@
|
|
1471 |
"width": null
|
1472 |
}
|
1473 |
},
|
1474 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1475 |
"model_module": "@jupyter-widgets/controls",
|
1476 |
-
"model_name": "DescriptionStyleModel",
|
1477 |
"model_module_version": "1.5.0",
|
|
|
1478 |
"state": {
|
1479 |
"_model_module": "@jupyter-widgets/controls",
|
1480 |
"_model_module_version": "1.5.0",
|
@@ -1491,4 +1284,4 @@
|
|
1491 |
},
|
1492 |
"nbformat": 4,
|
1493 |
"nbformat_minor": 5
|
1494 |
-
}
|
|
|
7 |
"metadata": {
|
8 |
"id": "a87fe5f3"
|
9 |
},
|
10 |
+
"outputs": [
|
11 |
+
{
|
12 |
+
"name": "stderr",
|
13 |
+
"output_type": "stream",
|
14 |
+
"text": [
|
15 |
+
"/opt/anaconda3/envs/CSCI544/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
16 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
17 |
+
]
|
18 |
+
}
|
19 |
+
],
|
20 |
"source": [
|
21 |
"import pandas as pd\n",
|
22 |
"import torch\n",
|
|
|
32 |
"import os\n",
|
33 |
"import numpy as np\n",
|
34 |
"\n",
|
35 |
+
"\n",
|
|
|
|
|
36 |
"\"\"\"\"\n",
|
37 |
"with contextlib.redirect_stdout(sys.__stdout__), contextlib.redirect_stderr(sys.__stderr__):\n",
|
38 |
" %pip install datasets\n",
|
|
|
51 |
},
|
52 |
"outputs": [],
|
53 |
"source": [
|
54 |
+
"is_google_colab = False\n",
|
55 |
"use_bnb = False"
|
56 |
]
|
57 |
},
|
|
|
80 |
"id": "47577a7f",
|
81 |
"outputId": "999c4e88-3f89-49b1-9e21-abac91703bf3"
|
82 |
},
|
83 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
"source": [
|
85 |
"current_read_path = \"./\"\n",
|
86 |
"current_write_path = \"./\"\n",
|
|
|
120 |
"from src.prompts.pre_rag_prompt import input_text as input_prompt"
|
121 |
]
|
122 |
},
|
123 |
+
{
|
124 |
+
"cell_type": "markdown",
|
125 |
+
"id": "fdd54aab",
|
126 |
+
"metadata": {},
|
127 |
+
"source": []
|
128 |
+
},
|
129 |
{
|
130 |
"cell_type": "code",
|
131 |
"execution_count": 5,
|
|
|
162 |
},
|
163 |
"outputs": [
|
164 |
{
|
|
|
165 |
"name": "stderr",
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
"output_type": "stream",
|
|
|
167 |
"text": [
|
168 |
+
"/var/folders/g0/47tr69v179dg7w6zyphp9b280000gn/T/ipykernel_70691/3415966509.py:2: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
|
169 |
+
" df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n"
|
170 |
]
|
171 |
}
|
172 |
],
|
173 |
"source": [
|
|
|
174 |
"df = pd.read_csv(read_path(\"train-data/sql_train.tsv\"), sep='\\t')\n",
|
175 |
"df = df.applymap(lambda x: re.sub(r'\\s+', ' ', x) if isinstance(x, str) else x)\n",
|
176 |
"\n",
|
|
|
|
|
|
|
|
|
177 |
"model_name = read_path(\"deepseek-coder-1.3b-instruct\")\n",
|
178 |
"tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
"device_name = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
180 |
"device = torch.device(device_name)\n",
|
181 |
+
"\n",
|
182 |
"model = AutoModelForCausalLM.from_pretrained(\n",
|
183 |
" model_name,\n",
|
|
|
184 |
" device_map=device\n",
|
185 |
")\n",
|
|
|
186 |
"tokenizer.truncation_side = \"left\"\n"
|
187 |
]
|
188 |
},
|
189 |
{
|
190 |
"cell_type": "code",
|
191 |
"execution_count": 7,
|
192 |
+
"id": "11a687f7",
|
193 |
+
"metadata": {},
|
194 |
+
"outputs": [
|
195 |
+
{
|
196 |
+
"name": "stderr",
|
197 |
+
"output_type": "stream",
|
198 |
+
"text": [
|
199 |
+
"Map: 100%|██████████| 1044/1044 [00:07<00:00, 132.69 examples/s]\n"
|
200 |
+
]
|
201 |
+
}
|
202 |
+
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
"source": [
|
204 |
+
"from src.rag.get_tokenized_dataset import get_tokenized_dataset\n",
|
205 |
"\n",
|
206 |
+
"train_dataset, val_dataset = get_tokenized_dataset(df, tokenizer, input_prompt)\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
]
|
208 |
},
|
209 |
{
|
210 |
"cell_type": "code",
|
211 |
"execution_count": 9,
|
212 |
+
"id": "bc8b8212",
|
213 |
+
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
"outputs": [
|
215 |
{
|
216 |
+
"ename": "AttributeError",
|
217 |
+
"evalue": "'tuple' object has no attribute '__dict__'",
|
218 |
+
"output_type": "error",
|
219 |
+
"traceback": [
|
220 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
221 |
+
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
222 |
+
"Cell \u001b[0;32mIn[9], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minitialize_deepseek_model\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m initialize_deepseek_model\n\u001b[0;32m----> 3\u001b[0m model, trainer \u001b[38;5;241m=\u001b[39m initialize_deepseek_model(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR)\n",
|
223 |
+
"File \u001b[0;32m~/Documents/USC/spring_2025/NLP/SQL-Generation/src/model/initialize_deepseek_model.py:23\u001b[0m, in \u001b[0;36minitialize_deepseek_model\u001b[0;34m(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21minitialize_deepseek_model\u001b[39m(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR):\n\u001b[1;32m 6\u001b[0m lora_config \u001b[38;5;241m=\u001b[39m LoraConfig(\n\u001b[1;32m 7\u001b[0m r\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m16\u001b[39m, \u001b[38;5;66;03m# Rank of LoRA matrices (adjust for memory vs. accuracy)\u001b[39;00m\n\u001b[1;32m 8\u001b[0m lora_alpha\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m32\u001b[39m, \u001b[38;5;66;03m# Scaling factor\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 20\u001b[0m ]\n\u001b[1;32m 21\u001b[0m )\n\u001b[0;32m---> 23\u001b[0m model \u001b[38;5;241m=\u001b[39m get_peft_model(model, lora_config)\n\u001b[1;32m 24\u001b[0m model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m 26\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m 27\u001b[0m output_dir\u001b[38;5;241m=\u001b[39mMODEL_DIR,\n\u001b[1;32m 28\u001b[0m eval_strategy\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mepoch\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;66;03m# Evaluate at the end of each epoch\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 42\u001b[0m greater_is_better\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 43\u001b[0m )\n",
|
224 |
+
"File \u001b[0;32m/opt/anaconda3/envs/CSCI544/lib/python3.11/site-packages/peft/mapping_func.py:66\u001b[0m, in \u001b[0;36mget_peft_model\u001b[0;34m(model, peft_config, adapter_name, mixed, autocast_adapter_dtype, revision, low_cpu_mem_usage)\u001b[0m\n\u001b[1;32m 64\u001b[0m model_config \u001b[38;5;241m=\u001b[39m BaseTuner\u001b[38;5;241m.\u001b[39mget_model_config(model)\n\u001b[1;32m 65\u001b[0m old_name \u001b[38;5;241m=\u001b[39m peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path\n\u001b[0;32m---> 66\u001b[0m new_name \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname_or_path\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 67\u001b[0m peft_config\u001b[38;5;241m.\u001b[39mbase_model_name_or_path \u001b[38;5;241m=\u001b[39m new_name\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# Especially in notebook environments there could be a case that a user wants to experiment with different\u001b[39;00m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;66;03m# configuration values. However, it is likely that there won't be any changes for new configs on an already\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;66;03m# initialized PEFT model. The best we can do is warn the user about it.\u001b[39;00m\n",
|
225 |
+
"\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute '__dict__'"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
]
|
227 |
}
|
228 |
],
|
229 |
"source": [
|
230 |
+
"from src.model.initialize_deepseek_model import initialize_deepseek_model\n",
|
231 |
"\n",
|
232 |
+
"model, trainer = initialize_deepseek_model(model, device, tokenizer, train_dataset, val_dataset, MODEL_DIR)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
]
|
234 |
},
|
235 |
{
|
236 |
"cell_type": "code",
|
237 |
+
"execution_count": 9,
|
238 |
"id": "8890a657",
|
239 |
"metadata": {
|
240 |
"colab": {
|
|
|
245 |
},
|
246 |
"outputs": [
|
247 |
{
|
|
|
248 |
"name": "stdout",
|
249 |
+
"output_type": "stream",
|
250 |
+
"text": [
|
251 |
+
"'NoneType' object has no attribute 'cadam32bit_grad_fp32'\n"
|
252 |
+
]
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"name": "stderr",
|
256 |
+
"output_type": "stream",
|
257 |
"text": [
|
258 |
+
"/opt/anaconda3/envs/CSCI544/lib/python3.11/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n",
|
259 |
+
" warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n"
|
260 |
]
|
261 |
}
|
262 |
],
|
|
|
281 |
"\n",
|
282 |
"# Wrap model with LoRA adapters\n",
|
283 |
"model = get_peft_model(model, lora_config)\n",
|
284 |
+
"model = model.to(device)"
|
|
|
285 |
]
|
286 |
},
|
287 |
{
|
288 |
"cell_type": "code",
|
289 |
+
"execution_count": 10,
|
290 |
"id": "d9508451",
|
291 |
"metadata": {
|
292 |
"colab": {
|
|
|
297 |
},
|
298 |
"outputs": [
|
299 |
{
|
|
|
300 |
"name": "stderr",
|
301 |
+
"output_type": "stream",
|
302 |
"text": [
|
303 |
+
"/var/folders/g0/47tr69v179dg7w6zyphp9b280000gn/T/ipykernel_70404/2486149154.py:21: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
|
304 |
" trainer = Trainer(\n",
|
305 |
"No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
|
306 |
]
|
|
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
+
"execution_count": 11,
|
343 |
"id": "b0ff5278",
|
344 |
"metadata": {
|
345 |
"colab": {
|
|
|
351 |
},
|
352 |
"outputs": [
|
353 |
{
|
|
|
354 |
"name": "stderr",
|
355 |
+
"output_type": "stream",
|
356 |
"text": [
|
357 |
+
"\n",
|
358 |
+
"KeyboardInterrupt\n",
|
359 |
+
"\n"
|
360 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
}
|
362 |
],
|
363 |
"source": [
|
|
|
372 |
},
|
373 |
{
|
374 |
"cell_type": "code",
|
375 |
+
"execution_count": null,
|
376 |
+
"id": "J7qO7FE73i40",
|
377 |
+
"metadata": {
|
378 |
+
"id": "J7qO7FE73i40"
|
379 |
+
},
|
380 |
+
"outputs": [],
|
381 |
"source": [
|
382 |
"\n",
|
383 |
"# Prepare query with the same prompt\n",
|
|
|
393 |
"model_output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)\n",
|
394 |
"\n",
|
395 |
"print(\"Generated Tables:\", model_output)"
|
396 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
},
|
398 |
{
|
399 |
"cell_type": "code",
|
400 |
+
"execution_count": null,
|
401 |
+
"id": "kwHMVyQa3n89",
|
402 |
+
"metadata": {
|
403 |
+
"id": "kwHMVyQa3n89"
|
404 |
+
},
|
405 |
+
"outputs": [],
|
406 |
"source": [
|
407 |
"import sqlite3 as sql\n",
|
408 |
"\n",
|
|
|
420 |
" question, tables = full_example[prompt_length:].split(\"Tables:\\n\")\n",
|
421 |
" print(question)\n",
|
422 |
" print(tables)\n",
|
423 |
+
" break\n"
|
424 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
},
|
426 |
{
|
427 |
"cell_type": "code",
|
428 |
+
"execution_count": null,
|
429 |
+
"id": "LhiHqAaB9uE4",
|
430 |
+
"metadata": {
|
431 |
+
"id": "LhiHqAaB9uE4"
|
432 |
+
},
|
433 |
+
"outputs": [],
|
434 |
"source": [
|
435 |
"def extract_tables_from_string(s):\n",
|
436 |
" keywords = {\"game\", \"team\", \"other_stats\"}\n",
|
437 |
" found = {k for k in keywords if k in s}\n",
|
438 |
" return found"
|
439 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
440 |
},
|
441 |
{
|
442 |
"cell_type": "markdown",
|
443 |
+
"id": "Kdd8nxWD9txh",
|
444 |
"metadata": {
|
445 |
"id": "Kdd8nxWD9txh"
|
446 |
},
|
447 |
+
"source": []
|
448 |
},
|
449 |
{
|
450 |
"cell_type": "code",
|
451 |
+
"execution_count": null,
|
452 |
+
"id": "KjAXaUgp4TfY",
|
453 |
+
"metadata": {
|
454 |
+
"id": "KjAXaUgp4TfY"
|
455 |
+
},
|
456 |
+
"outputs": [],
|
457 |
"source": [
|
458 |
"def compare_table_lists(actual_tables, generated_tables):\n",
|
459 |
" actual_set = extract_tables_from_string(actual_tables)\n",
|
|
|
461 |
"\n",
|
462 |
" # Check if they match\n",
|
463 |
" return generated_set == actual_set"
|
464 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
465 |
},
|
466 |
{
|
467 |
"cell_type": "code",
|
468 |
+
"execution_count": null,
|
469 |
+
"id": "8h7bpMML6G6v",
|
470 |
+
"metadata": {
|
471 |
+
"id": "8h7bpMML6G6v"
|
472 |
+
},
|
473 |
+
"outputs": [],
|
474 |
"source": [
|
475 |
"\n",
|
476 |
"num_sql_matched = 0\n",
|
|
|
508 |
"\n",
|
509 |
"print(\"Accuracy :\", num_sql_matched/len(val_dataset))\n",
|
510 |
"\n"
|
511 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
},
|
513 |
{
|
514 |
"cell_type": "code",
|
515 |
+
"execution_count": null,
|
516 |
+
"id": "CoJeZ4FoUMp_",
|
517 |
+
"metadata": {
|
518 |
+
"id": "CoJeZ4FoUMp_"
|
519 |
+
},
|
520 |
+
"outputs": [],
|
521 |
"source": [
|
522 |
"\n",
|
523 |
"num_sql_matched = 0\n",
|
|
|
555 |
"\n",
|
556 |
"print(\"Accuracy :\", num_sql_matched/len(val_dataset))\n",
|
557 |
"\n"
|
558 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
},
|
560 |
{
|
561 |
"cell_type": "code",
|
562 |
+
"execution_count": null,
|
563 |
+
"id": "lNG1joS3T8DN",
|
|
|
|
|
564 |
"metadata": {
|
565 |
"id": "lNG1joS3T8DN"
|
566 |
},
|
567 |
+
"outputs": [],
|
568 |
+
"source": [
|
569 |
+
"model = AutoModelForCausalLM.from_pretrained(MODEL_DIR, torch_dtype=torch.bfloat16, device_map=device)\n",
|
570 |
+
"tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)\n"
|
571 |
+
]
|
572 |
}
|
573 |
],
|
574 |
"metadata": {
|
575 |
+
"accelerator": "GPU",
|
576 |
+
"colab": {
|
577 |
+
"gpuType": "A100",
|
578 |
+
"provenance": []
|
579 |
+
},
|
580 |
"kernelspec": {
|
581 |
"display_name": "Python 3",
|
582 |
"name": "python3"
|
|
|
593 |
"pygments_lexer": "ipython3",
|
594 |
"version": "3.11.11"
|
595 |
},
|
|
|
|
|
|
|
|
|
|
|
596 |
"widgets": {
|
597 |
"application/vnd.jupyter.widget-state+json": {
|
598 |
+
"13ae11c314664c44ae18d35cf57a1334": {
|
599 |
+
"model_module": "@jupyter-widgets/base",
|
600 |
+
"model_module_version": "1.2.0",
|
601 |
+
"model_name": "LayoutModel",
|
602 |
"state": {
|
603 |
+
"_model_module": "@jupyter-widgets/base",
|
604 |
+
"_model_module_version": "1.2.0",
|
605 |
+
"_model_name": "LayoutModel",
|
|
|
606 |
"_view_count": null,
|
607 |
+
"_view_module": "@jupyter-widgets/base",
|
608 |
+
"_view_module_version": "1.2.0",
|
609 |
+
"_view_name": "LayoutView",
|
610 |
+
"align_content": null,
|
611 |
+
"align_items": null,
|
612 |
+
"align_self": null,
|
613 |
+
"border": null,
|
614 |
+
"bottom": null,
|
615 |
+
"display": null,
|
616 |
+
"flex": null,
|
617 |
+
"flex_flow": null,
|
618 |
+
"grid_area": null,
|
619 |
+
"grid_auto_columns": null,
|
620 |
+
"grid_auto_flow": null,
|
621 |
+
"grid_auto_rows": null,
|
622 |
+
"grid_column": null,
|
623 |
+
"grid_gap": null,
|
624 |
+
"grid_row": null,
|
625 |
+
"grid_template_areas": null,
|
626 |
+
"grid_template_columns": null,
|
627 |
+
"grid_template_rows": null,
|
628 |
+
"height": null,
|
629 |
+
"justify_content": null,
|
630 |
+
"justify_items": null,
|
631 |
+
"left": null,
|
632 |
+
"margin": null,
|
633 |
+
"max_height": null,
|
634 |
+
"max_width": null,
|
635 |
+
"min_height": null,
|
636 |
+
"min_width": null,
|
637 |
+
"object_fit": null,
|
638 |
+
"object_position": null,
|
639 |
+
"order": null,
|
640 |
+
"overflow": null,
|
641 |
+
"overflow_x": null,
|
642 |
+
"overflow_y": null,
|
643 |
+
"padding": null,
|
644 |
+
"right": null,
|
645 |
+
"top": null,
|
646 |
+
"visibility": null,
|
647 |
+
"width": null
|
648 |
+
}
|
649 |
+
},
|
650 |
+
"17ddbb74e1764f37b8d34c311fae200c": {
|
651 |
+
"model_module": "@jupyter-widgets/controls",
|
652 |
+
"model_module_version": "1.5.0",
|
653 |
+
"model_name": "HTMLModel",
|
654 |
"state": {
|
655 |
"_dom_classes": [],
|
656 |
"_model_module": "@jupyter-widgets/controls",
|
|
|
668 |
"value": "Fetching 37 files: 100%"
|
669 |
}
|
670 |
},
|
671 |
+
"1a8c093fccbb437db6e0390a920f5cc5": {
|
672 |
"model_module": "@jupyter-widgets/controls",
|
|
|
673 |
"model_module_version": "1.5.0",
|
674 |
+
"model_name": "FloatProgressModel",
|
675 |
"state": {
|
676 |
"_dom_classes": [],
|
677 |
"_model_module": "@jupyter-widgets/controls",
|
|
|
684 |
"bar_style": "success",
|
685 |
"description": "",
|
686 |
"description_tooltip": null,
|
687 |
+
"layout": "IPY_MODEL_13ae11c314664c44ae18d35cf57a1334",
|
688 |
+
"max": 1044,
|
689 |
"min": 0,
|
690 |
"orientation": "horizontal",
|
691 |
+
"style": "IPY_MODEL_e68cfd05ba994a34b93107d2eab82ad3",
|
692 |
+
"value": 1044
|
693 |
}
|
694 |
},
|
695 |
+
"1ec5329ea0434df4b74d0f311e016c3e": {
|
696 |
"model_module": "@jupyter-widgets/controls",
|
|
|
697 |
"model_module_version": "1.5.0",
|
698 |
+
"model_name": "DescriptionStyleModel",
|
699 |
"state": {
|
|
|
700 |
"_model_module": "@jupyter-widgets/controls",
|
701 |
"_model_module_version": "1.5.0",
|
702 |
+
"_model_name": "DescriptionStyleModel",
|
703 |
"_view_count": null,
|
704 |
+
"_view_module": "@jupyter-widgets/base",
|
705 |
+
"_view_module_version": "1.2.0",
|
706 |
+
"_view_name": "StyleView",
|
707 |
+
"description_width": ""
|
|
|
|
|
|
|
|
|
|
|
708 |
}
|
709 |
},
|
710 |
+
"3b80c66e0f384c45ab4187301599fab2": {
|
711 |
"model_module": "@jupyter-widgets/base",
|
|
|
712 |
"model_module_version": "1.2.0",
|
713 |
+
"model_name": "LayoutModel",
|
714 |
"state": {
|
715 |
"_model_module": "@jupyter-widgets/base",
|
716 |
"_model_module_version": "1.2.0",
|
|
|
759 |
"width": null
|
760 |
}
|
761 |
},
|
762 |
+
"4c41e81bcd254df7b1265206a5a6b40b": {
|
763 |
+
"model_module": "@jupyter-widgets/controls",
|
764 |
+
"model_module_version": "1.5.0",
|
765 |
+
"model_name": "HTMLModel",
|
766 |
+
"state": {
|
767 |
+
"_dom_classes": [],
|
768 |
+
"_model_module": "@jupyter-widgets/controls",
|
769 |
+
"_model_module_version": "1.5.0",
|
770 |
+
"_model_name": "HTMLModel",
|
771 |
+
"_view_count": null,
|
772 |
+
"_view_module": "@jupyter-widgets/controls",
|
773 |
+
"_view_module_version": "1.5.0",
|
774 |
+
"_view_name": "HTMLView",
|
775 |
+
"description": "",
|
776 |
+
"description_tooltip": null,
|
777 |
+
"layout": "IPY_MODEL_dd24270dc07942a6972fbfaf58129989",
|
778 |
+
"placeholder": "",
|
779 |
+
"style": "IPY_MODEL_643903cd7a5b4a52a4687ec38eb8c4dc",
|
780 |
+
"value": "Map: 100%"
|
781 |
+
}
|
782 |
+
},
|
783 |
+
"5d711bb927024d8d9f9b8bb685d6f388": {
|
784 |
+
"model_module": "@jupyter-widgets/controls",
|
785 |
+
"model_module_version": "1.5.0",
|
786 |
+
"model_name": "DescriptionStyleModel",
|
787 |
+
"state": {
|
788 |
+
"_model_module": "@jupyter-widgets/controls",
|
789 |
+
"_model_module_version": "1.5.0",
|
790 |
+
"_model_name": "DescriptionStyleModel",
|
791 |
+
"_view_count": null,
|
792 |
+
"_view_module": "@jupyter-widgets/base",
|
793 |
+
"_view_module_version": "1.2.0",
|
794 |
+
"_view_name": "StyleView",
|
795 |
+
"description_width": ""
|
796 |
+
}
|
797 |
+
},
|
798 |
+
"5d89a5574a3d4a8993e6dca78d406d2d": {
|
799 |
"model_module": "@jupyter-widgets/base",
|
|
|
800 |
"model_module_version": "1.2.0",
|
801 |
+
"model_name": "LayoutModel",
|
802 |
"state": {
|
803 |
"_model_module": "@jupyter-widgets/base",
|
804 |
"_model_module_version": "1.2.0",
|
|
|
847 |
"width": null
|
848 |
}
|
849 |
},
|
850 |
+
"643903cd7a5b4a52a4687ec38eb8c4dc": {
|
851 |
"model_module": "@jupyter-widgets/controls",
|
|
|
852 |
"model_module_version": "1.5.0",
|
853 |
+
"model_name": "DescriptionStyleModel",
|
854 |
"state": {
|
855 |
"_model_module": "@jupyter-widgets/controls",
|
856 |
"_model_module_version": "1.5.0",
|
|
|
862 |
"description_width": ""
|
863 |
}
|
864 |
},
|
865 |
+
"68ff2fc00bd041e7b79a811e3de1e596": {
|
866 |
+
"model_module": "@jupyter-widgets/controls",
|
867 |
+
"model_module_version": "1.5.0",
|
868 |
+
"model_name": "HBoxModel",
|
869 |
+
"state": {
|
870 |
+
"_dom_classes": [],
|
871 |
+
"_model_module": "@jupyter-widgets/controls",
|
872 |
+
"_model_module_version": "1.5.0",
|
873 |
+
"_model_name": "HBoxModel",
|
874 |
+
"_view_count": null,
|
875 |
+
"_view_module": "@jupyter-widgets/controls",
|
876 |
+
"_view_module_version": "1.5.0",
|
877 |
+
"_view_name": "HBoxView",
|
878 |
+
"box_style": "",
|
879 |
+
"children": [
|
880 |
+
"IPY_MODEL_4c41e81bcd254df7b1265206a5a6b40b",
|
881 |
+
"IPY_MODEL_1a8c093fccbb437db6e0390a920f5cc5",
|
882 |
+
"IPY_MODEL_e11d04a9d22a4229922e3eb4e3eb6466"
|
883 |
+
],
|
884 |
+
"layout": "IPY_MODEL_5d89a5574a3d4a8993e6dca78d406d2d"
|
885 |
+
}
|
886 |
+
},
|
887 |
+
"7751defbc4534d518d9e923b9019aa8b": {
|
888 |
"model_module": "@jupyter-widgets/base",
|
|
|
889 |
"model_module_version": "1.2.0",
|
890 |
+
"model_name": "LayoutModel",
|
891 |
"state": {
|
892 |
"_model_module": "@jupyter-widgets/base",
|
893 |
"_model_module_version": "1.2.0",
|
|
|
936 |
"width": null
|
937 |
}
|
938 |
},
|
939 |
+
"84cc47dc70864bf3aa7599c06eb13c51": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
940 |
"model_module": "@jupyter-widgets/base",
|
|
|
941 |
"model_module_version": "1.2.0",
|
942 |
+
"model_name": "LayoutModel",
|
943 |
"state": {
|
944 |
"_model_module": "@jupyter-widgets/base",
|
945 |
"_model_module_version": "1.2.0",
|
|
|
988 |
"width": null
|
989 |
}
|
990 |
},
|
991 |
+
"9200f1303f124bddaa6114cdf0f5f878": {
|
992 |
"model_module": "@jupyter-widgets/controls",
|
|
|
993 |
"model_module_version": "1.5.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
994 |
"model_name": "HBoxModel",
|
|
|
995 |
"state": {
|
996 |
"_dom_classes": [],
|
997 |
"_model_module": "@jupyter-widgets/controls",
|
|
|
1003 |
"_view_name": "HBoxView",
|
1004 |
"box_style": "",
|
1005 |
"children": [
|
1006 |
+
"IPY_MODEL_17ddbb74e1764f37b8d34c311fae200c",
|
1007 |
+
"IPY_MODEL_ef732739334b4ac593fd665e01cd83c1",
|
1008 |
+
"IPY_MODEL_949ee3d1a9cd4060864dec5d4283ef2c"
|
1009 |
],
|
1010 |
+
"layout": "IPY_MODEL_b98629e053674527aacca899ab7f11a9"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1011 |
}
|
1012 |
},
|
1013 |
+
"949ee3d1a9cd4060864dec5d4283ef2c": {
|
1014 |
"model_module": "@jupyter-widgets/controls",
|
|
|
1015 |
"model_module_version": "1.5.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1016 |
"model_name": "HTMLModel",
|
|
|
1017 |
"state": {
|
1018 |
"_dom_classes": [],
|
1019 |
"_model_module": "@jupyter-widgets/controls",
|
|
|
1025 |
"_view_name": "HTMLView",
|
1026 |
"description": "",
|
1027 |
"description_tooltip": null,
|
1028 |
+
"layout": "IPY_MODEL_7751defbc4534d518d9e923b9019aa8b",
|
1029 |
"placeholder": "",
|
1030 |
+
"style": "IPY_MODEL_fe6352bce22a40e7a936e7f90313bd02",
|
1031 |
+
"value": " 37/37 [00:00<00:00, 3657.54it/s]"
|
1032 |
}
|
1033 |
},
|
1034 |
+
"b98629e053674527aacca899ab7f11a9": {
|
1035 |
"model_module": "@jupyter-widgets/base",
|
|
|
1036 |
"model_module_version": "1.2.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1037 |
"model_name": "LayoutModel",
|
|
|
1038 |
"state": {
|
1039 |
"_model_module": "@jupyter-widgets/base",
|
1040 |
"_model_module_version": "1.2.0",
|
|
|
1083 |
"width": null
|
1084 |
}
|
1085 |
},
|
1086 |
+
"db6a23e658a34722a8f22505c6ace7b4": {
|
1087 |
"model_module": "@jupyter-widgets/controls",
|
|
|
1088 |
"model_module_version": "1.5.0",
|
1089 |
+
"model_name": "ProgressStyleModel",
|
1090 |
"state": {
|
1091 |
"_model_module": "@jupyter-widgets/controls",
|
1092 |
"_model_module_version": "1.5.0",
|
1093 |
+
"_model_name": "ProgressStyleModel",
|
1094 |
"_view_count": null,
|
1095 |
"_view_module": "@jupyter-widgets/base",
|
1096 |
"_view_module_version": "1.2.0",
|
1097 |
"_view_name": "StyleView",
|
1098 |
+
"bar_color": null,
|
1099 |
"description_width": ""
|
1100 |
}
|
1101 |
},
|
1102 |
+
"dd24270dc07942a6972fbfaf58129989": {
|
1103 |
"model_module": "@jupyter-widgets/base",
|
|
|
1104 |
"model_module_version": "1.2.0",
|
1105 |
+
"model_name": "LayoutModel",
|
1106 |
"state": {
|
1107 |
"_model_module": "@jupyter-widgets/base",
|
1108 |
"_model_module_version": "1.2.0",
|
|
|
1151 |
"width": null
|
1152 |
}
|
1153 |
},
|
1154 |
+
"e11d04a9d22a4229922e3eb4e3eb6466": {
|
1155 |
+
"model_module": "@jupyter-widgets/controls",
|
1156 |
+
"model_module_version": "1.5.0",
|
1157 |
+
"model_name": "HTMLModel",
|
1158 |
+
"state": {
|
1159 |
+
"_dom_classes": [],
|
1160 |
+
"_model_module": "@jupyter-widgets/controls",
|
1161 |
+
"_model_module_version": "1.5.0",
|
1162 |
+
"_model_name": "HTMLModel",
|
1163 |
+
"_view_count": null,
|
1164 |
+
"_view_module": "@jupyter-widgets/controls",
|
1165 |
+
"_view_module_version": "1.5.0",
|
1166 |
+
"_view_name": "HTMLView",
|
1167 |
+
"description": "",
|
1168 |
+
"description_tooltip": null,
|
1169 |
+
"layout": "IPY_MODEL_ea283e7e8b234519b881c562b7eb01d3",
|
1170 |
+
"placeholder": "",
|
1171 |
+
"style": "IPY_MODEL_1ec5329ea0434df4b74d0f311e016c3e",
|
1172 |
+
"value": " 1044/1044 [00:10<00:00, 43.90 examples/s]"
|
1173 |
+
}
|
1174 |
+
},
|
1175 |
"e68cfd05ba994a34b93107d2eab82ad3": {
|
1176 |
"model_module": "@jupyter-widgets/controls",
|
|
|
1177 |
"model_module_version": "1.5.0",
|
1178 |
+
"model_name": "ProgressStyleModel",
|
1179 |
"state": {
|
1180 |
"_model_module": "@jupyter-widgets/controls",
|
1181 |
"_model_module_version": "1.5.0",
|
|
|
1190 |
},
|
1191 |
"ea283e7e8b234519b881c562b7eb01d3": {
|
1192 |
"model_module": "@jupyter-widgets/base",
|
|
|
1193 |
"model_module_version": "1.2.0",
|
1194 |
+
"model_name": "LayoutModel",
|
1195 |
"state": {
|
1196 |
"_model_module": "@jupyter-widgets/base",
|
1197 |
"_model_module_version": "1.2.0",
|
|
|
1240 |
"width": null
|
1241 |
}
|
1242 |
},
|
1243 |
+
"ef732739334b4ac593fd665e01cd83c1": {
|
1244 |
+
"model_module": "@jupyter-widgets/controls",
|
1245 |
+
"model_module_version": "1.5.0",
|
1246 |
+
"model_name": "FloatProgressModel",
|
1247 |
+
"state": {
|
1248 |
+
"_dom_classes": [],
|
1249 |
+
"_model_module": "@jupyter-widgets/controls",
|
1250 |
+
"_model_module_version": "1.5.0",
|
1251 |
+
"_model_name": "FloatProgressModel",
|
1252 |
+
"_view_count": null,
|
1253 |
+
"_view_module": "@jupyter-widgets/controls",
|
1254 |
+
"_view_module_version": "1.5.0",
|
1255 |
+
"_view_name": "ProgressView",
|
1256 |
+
"bar_style": "success",
|
1257 |
+
"description": "",
|
1258 |
+
"description_tooltip": null,
|
1259 |
+
"layout": "IPY_MODEL_3b80c66e0f384c45ab4187301599fab2",
|
1260 |
+
"max": 37,
|
1261 |
+
"min": 0,
|
1262 |
+
"orientation": "horizontal",
|
1263 |
+
"style": "IPY_MODEL_db6a23e658a34722a8f22505c6ace7b4",
|
1264 |
+
"value": 37
|
1265 |
+
}
|
1266 |
+
},
|
1267 |
+
"fe6352bce22a40e7a936e7f90313bd02": {
|
1268 |
"model_module": "@jupyter-widgets/controls",
|
|
|
1269 |
"model_module_version": "1.5.0",
|
1270 |
+
"model_name": "DescriptionStyleModel",
|
1271 |
"state": {
|
1272 |
"_model_module": "@jupyter-widgets/controls",
|
1273 |
"_model_module_version": "1.5.0",
|
|
|
1284 |
},
|
1285 |
"nbformat": 4,
|
1286 |
"nbformat_minor": 5
|
1287 |
+
}
|