craffel HF Staff commited on
Commit
5f8478c
·
verified ·
1 Parent(s): 6b52a27

Upload checkpoints/0000100000/consolidated/params.json with huggingface_hub

Browse files
checkpoints/0000100000/consolidated/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "comma_v0p1_yolooooo", "dump_dir": "/fsx/craffel/lingua_logs/comma_v0p1/", "seed": 777, "grad_acc_steps": 4, "gc_collect_freq": 1000, "probe_freq": null, "steps": 500000, "data": {"root_dir": "/scratch/craffel/lingua/data/", "sources": {"peS2o": 0.274065475510351, "stackexchange": 0.134617935796937, "stackv2_edu": 0.127770669195666, "cccc": 0.0871992270000557, "wikimedia": 0.0861800315862719, "github_archive": 0.0606452345122248, "uspto": 0.0413469377516883, "pubmed": 0.0367902799837971, "arxiv_papers": 0.0292395449667613, "caselaw_access_project": 0.0193875362722656, "wikiteam": 0.0137485410839637, "doab": 0.0180439781895451, "uk_hansard": 0.0144498535570883, "pre_1929_books": 0.0115755547988338, "ubuntu_irc": 0.00794254267719456, "regulations": 0.00762583706405442, "data_provenance_initiative": 0.00512264496834867, "project_gutenberg": 0.00502100654070129, "youtube": 0.00465917165839394, "arxiv_abstracts": 0.00359635066160403, "stackv2_html": 0.00225924255952781, "usgpo": 0.00226024581728848, "library_of_congress": 0.00222469340783564, "biodiversity_heritage_library": 0.00221737524370278, "pressbooks": 0.000865101033213598, "libretexts": 0.00054149556727006, "news": 0.000372716196818104, "foodista": 0.000125363443065615, "oercommons": 7.78696843693821e-05, "python_enhancement_proposals": 1.69983991984805e-05, "public_domain_review": 1.05448719635173e-05}, "batch_size": 2, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 4096, "tokenizer": {"name": "tiktoken", "path": "/fsx/craffel/lingua/tokenizers/common-pile-tokenizer.tiktoken"}}, "optim": {"lr": 0.001, "weight_decay": 0.2, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 4096, "n_layers": 32, "head_dim": null, "n_heads": 32, "n_kv_heads": null, "ffn_dim_multiplier": 1.0, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 100000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 64256, "weight_tying": false, "sliding_window": null}, "distributed": {"dp_shard": 1, "dp_replicate": 64, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 2000, "keep": 3}, "path": "/fsx/craffel/lingua_logs/comma_v0p1/checkpoints", "init_ckpt_path": null, "continue_training_from_init": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", {"task": "boolq", "dataset_kwargs": {"trust_remote_code": true}}, "piqa", {"task": "social_iqa", "dataset_kwargs": {"trust_remote_code": true}}, "winogrande", "openbookqa", "arc_easy", "arc_challenge", "race", "commonsense_qa", {"task": "copa", "dataset_kwargs": {"trust_remote_code": true}}, "mmlu", "mmlu_pro"]}, "generator": {"max_tokens": 8192, "dtype": "bf16"}}}