Spaces:
Sleeping
Sleeping
from dataclasses import dataclass, field | |
from typing import Optional | |
class DataTrainingArguments: | |
""" | |
Arguments pertaining to what data we are going to input our model for training and eval. | |
""" | |
task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."}) | |
dataset_name: Optional[str] = field( | |
default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} | |
) | |
dataset_config_name: Optional[str] = field( | |
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} | |
) | |
train_file: Optional[str] = field( | |
default=None, metadata={"help": "The input training data file (a csv or JSON file)."} | |
) | |
validation_file: Optional[str] = field( | |
default=None, | |
metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."}, | |
) | |
test_file: Optional[str] = field( | |
default=None, | |
metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."}, | |
) | |
overwrite_cache: bool = field( | |
default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} | |
) | |
preprocessing_num_workers: Optional[int] = field( | |
default=None, | |
metadata={"help": "The number of processes to use for the preprocessing."}, | |
) | |
pad_to_max_length: bool = field( | |
default=True, | |
metadata={ | |
"help": "Whether to pad all samples to model maximum sentence length. " | |
"If False, will pad the samples dynamically when batching to the maximum length in the batch. More " | |
"efficient on GPU but very bad for TPU." | |
}, | |
) | |
max_train_samples: Optional[int] = field( | |
default=None, | |
metadata={ | |
"help": "For debugging purposes or quicker training, truncate the number of training examples to this " | |
"value if set." | |
}, | |
) | |
max_val_samples: Optional[int] = field( | |
default=None, | |
metadata={ | |
"help": "For debugging purposes or quicker training, truncate the number of validation examples to this " | |
"value if set." | |
}, | |
) | |
max_test_samples: Optional[int] = field( | |
default=None, | |
metadata={ | |
"help": "For debugging purposes or quicker training, truncate the number of test examples to this " | |
"value if set." | |
}, | |
) | |
label_all_tokens: bool = field( | |
default=False, | |
metadata={ | |
"help": "Whether to put the label for one word on all tokens of generated by that word or just on the " | |
"one (in which case the other tokens will have a padding index)." | |
}, | |
) | |
return_entity_level_metrics: bool = field( | |
default=False, | |
metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."}, | |
) | |
class XFUNDataTrainingArguments(DataTrainingArguments): | |
lang: Optional[str] = field(default="en") | |
additional_langs: Optional[str] = field(default=None) | |