codefuse-admin's picture
upload model weights files
cc0729a verified
raw
history blame contribute delete
1.22 kB
{
"activation_dropout": 0.0,
"attention_dropout": 0.0,
"attn_cfg": {
"ffn_expand_ratio": 1.3333333333333333,
"num_heads": 16,
"num_heads_k": 1,
"num_heads_v": 16,
"rotary_emb_dim": -1,
"rotary_emb_scale_base": null,
"window_size": 4096
},
"d_model": 2048,
"dropout": 0.0,
"initializer_range": 0.02,
"max_position_embeddings": 4096,
"mixer_cfg": {
"expand_ratio": 2.0,
"input_gate_low_rank": "auto",
"mem_size": 64,
"mode": "fused_chunk",
"normalize_epsilon": 1e-05
},
"model_type": "rodimus",
"n_layer": 24,
"norm_epsilon": 1e-05,
"residual_in_fp32": true,
"tie_word_embeddings": true,
"transformers_version": "4.37.2",
"use_cache": true,
"use_fast_path": true,
"vocab_size": 126464,
"eos_token_id": 126081,
"bos_token_id": 126080,
"pad_token_id": null,
"block_type": "rodimus_plus",
"auto_map": {
"AutoConfig": "configuration_rodimus.RodimusConfig",
"AutoModel": "modeling_rodimus.RodimusForCausalLM",
"AutoModelForCausalLM": "modeling_rodimus.RodimusForCausalLM"
},
"torch_dtype": "float16"
}