Spaces:
Runtime error
Runtime error
Fix bug when TrainingArguments and Trainer
Browse files- spanish_medica_llm.py +6 -7
spanish_medica_llm.py
CHANGED
@@ -16,10 +16,10 @@ from datasets import load_dataset, concatenate_datasets
|
|
16 |
from transformers import (
|
17 |
AutoModelForCausalLM,
|
18 |
AutoTokenizer,
|
19 |
-
BitsAndBytesConfig,
|
|
|
20 |
TrainingArguments,
|
21 |
-
Trainer
|
22 |
-
DataCollatorForLanguageModeling
|
23 |
)
|
24 |
|
25 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
@@ -399,7 +399,7 @@ def getTokenizedDataset(dataset, tokenizer):
|
|
399 |
return dataset
|
400 |
|
401 |
return dataset.map(
|
402 |
-
lambda element : tokenize(element, tokenizer)
|
403 |
batched = True,
|
404 |
remove_columns = dataset["train"].column_names
|
405 |
)
|
@@ -497,8 +497,7 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
|
|
497 |
else:
|
498 |
tokenizer.pad_token = tokenizer.eos_token
|
499 |
data_collator_pretrain = DataCollatorForLanguageModeling(tokenizer, mlm = False)
|
500 |
-
|
501 |
-
training_args = transformers.TrainingArguments(
|
502 |
output_dir=output_dir,
|
503 |
push_to_hub = True,
|
504 |
hub_private_repo = False,
|
@@ -524,7 +523,7 @@ def configAndRunTraining(basemodel, dataset, eval_dataset, tokenizer):
|
|
524 |
bf16=False
|
525 |
)
|
526 |
|
527 |
-
trainer =
|
528 |
model= basemodel,
|
529 |
train_dataset = dataset['train'],
|
530 |
eval_dataset = eval_dataset,
|
|
|
16 |
from transformers import (
|
17 |
AutoModelForCausalLM,
|
18 |
AutoTokenizer,
|
19 |
+
BitsAndBytesConfig,
|
20 |
+
DataCollatorForLanguageModeling,
|
21 |
TrainingArguments,
|
22 |
+
Trainer
|
|
|
23 |
)
|
24 |
|
25 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
|
|
399 |
return dataset
|
400 |
|
401 |
return dataset.map(
|
402 |
+
lambda element : tokenize(element, tokenizer),
|
403 |
batched = True,
|
404 |
remove_columns = dataset["train"].column_names
|
405 |
)
|
|
|
497 |
else:
|
498 |
tokenizer.pad_token = tokenizer.eos_token
|
499 |
data_collator_pretrain = DataCollatorForLanguageModeling(tokenizer, mlm = False)
|
500 |
+
training_args = TrainingArguments(
|
|
|
501 |
output_dir=output_dir,
|
502 |
push_to_hub = True,
|
503 |
hub_private_repo = False,
|
|
|
523 |
bf16=False
|
524 |
)
|
525 |
|
526 |
+
trainer = Trainer(
|
527 |
model= basemodel,
|
528 |
train_dataset = dataset['train'],
|
529 |
eval_dataset = eval_dataset,
|