# Data paths and options when using EleutherAI cluster | |
{ | |
"train-data-paths": ["/fsx/pile/pile_20B_tokenizer_text_document"], | |
"valid-data-paths": ["/fsx/pile/pile_20B_tokenizer_text_document"], | |
"test-data-paths": ["/fsx/pile/pile_20B_tokenizer_text_document"], | |
"tokenizer_type": "HFTokenizer", | |
"vocab-file": "/fsx/pile/20B_tokenizer.json", | |
"save": "/fsx/shiv/gpt-neox/runs/checkpoints/pythia/13b", | |
"load": "/fsx/shiv/gpt-neox/runs/checkpoints/pythia/13b", | |
"tensorboard-dir": "/fsx/shiv/gpt-neox/runs/tensorboard", | |
"log-dir": "/fsx/shiv/gpt-neox/runs/logs", | |
"wandb_team": "eleutherai", | |
"wandb_project": "pythia", | |
"wandb_group": "13B Decay", | |
"launcher": "openmpi", | |
"deepspeed_mpi": true | |
} | |