diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..c4cb545ce9c9bbbff6607aa2f92b4a57a0851402 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 022e1f3d2e222a6cccf8e3df51522259af995e52..2d0989b13d4b055aa2db8e3b6bb471470df0c4ca 100644 --- a/README.md +++ b/README.md @@ -107,3 +107,4 @@ For reasoning evaluations, we estimate pass@1 based on 10 runs with different se | MATH-500
pass@1 | 99.84 | 97.24 | 97.08 | | GPQA Diamond
pass@1 | 98.01 | 73.38 | 71.92 | | **Reasoning
Average Score** | **98.81** | **82.99** | **82.00** | + diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..af3145f4e827dd4bda40db2ccf82bf0183585d93 --- /dev/null +++ b/config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89147ca38cc1d65ae38e72cd7dca9906fb7f5436dba18de1b150f88196594cb1 +size 2538 diff --git a/configuration_deepseek.py b/configuration_deepseek.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a42479fd055c4fe236178953965f6353d16b7f --- /dev/null +++ b/configuration_deepseek.py @@ -0,0 +1,210 @@ +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +DEEPSEEK_PRETRAINED_CONFIG_ARCHIVE_MAP = {} +class DeepseekV3Config(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`DeepseekV3Model`]. It is used to instantiate an DeepSeek + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the DeepSeek-V3. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 129280): + Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`DeepseekV3Model`] + hidden_size (`int`, *optional*, defaults to 4096): + Dimension of the hidden representations. + intermediate_size (`int`, *optional*, defaults to 11008): + Dimension of the MLP representations. + moe_intermediate_size (`int`, *optional*, defaults to 1407): + Dimension of the MoE representations. + num_hidden_layers (`int`, *optional*, defaults to 32): + Number of hidden layers in the Transformer decoder. + num_nextn_predict_layers (`int`, *optional*, defaults to 1): + Number of nextn predict layers in the DeepSeekV3 Model. + num_attention_heads (`int`, *optional*, defaults to 32): + Number of attention heads for each attention layer in the Transformer decoder. + n_shared_experts (`int`, *optional*, defaults to None): + Number of shared experts, None means dense model. + n_routed_experts (`int`, *optional*, defaults to None): + Number of routed experts, None means dense model. + routed_scaling_factor (`float`, *optional*, defaults to 1.0): + Scaling factor or routed experts. + topk_method (`str`, *optional*, defaults to `gready`): + Topk method used in routed gate. + n_group (`int`, *optional*, defaults to None): + Number of groups for routed experts. + topk_group (`int`, *optional*, defaults to None): + Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups). + num_experts_per_tok (`int`, *optional*, defaults to None): + Number of selected experts, None means dense model. + moe_layer_freq (`int`, *optional*, defaults to 1): + The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers. + first_k_dense_replace (`int`, *optional*, defaults to 0): + Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head). + \--k dense layers--/ + norm_topk_prob (`bool`, *optional*, defaults to False): + Whether to normalize the weights of the routed experts. + scoring_func (`str`, *optional*, defaults to 'softmax'): + Method of computing expert weights. + aux_loss_alpha (`float`, *optional*, defaults to 0.001): + Auxiliary loss weight coefficient. + seq_aux = (`bool`, *optional*, defaults to True): + Whether to compute the auxiliary loss for each individual sample. + num_key_value_heads (`int`, *optional*): + This is the number of key_value heads that should be used to implement Grouped Query Attention. If + `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if + `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When + converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed + by meanpooling all the original heads within that group. For more details checkout [this + paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to + `num_attention_heads`. + hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + max_position_embeddings (`int`, *optional*, defaults to 2048): + The maximum sequence length that this model might ever be used with. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + rms_norm_eps (`float`, *optional*, defaults to 1e-06): + The epsilon used by the rms normalization layers. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if `config.is_decoder=True`. + pad_token_id (`int`, *optional*): + Padding token id. + bos_token_id (`int`, *optional*, defaults to 1): + Beginning of stream token id. + eos_token_id (`int`, *optional*, defaults to 2): + End of stream token id. + pretraining_tp (`int`, *optional*, defaults to 1): + Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this + document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is + necessary to ensure exact reproducibility of the pretraining results. Please refer to [this + issue](https://github.com/pytorch/pytorch/issues/76232). + tie_word_embeddings (`bool`, *optional*, defaults to `False`): + Whether to tie weight embeddings + rope_theta (`float`, *optional*, defaults to 10000.0): + The base period of the RoPE embeddings. + rope_scaling (`Dict`, *optional*): + Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling + strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is + `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update + `max_position_embeddings` to the expected new maximum. + attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`): + Whether to use a bias in the query, key, value and output projection layers during self-attention. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + + ```python + >>> from transformers import DeepseekV3Model, DeepseekV3Config + + >>> # Initializing a Deepseek-V3 style configuration + >>> configuration = DeepseekV3Config() + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "deepseek_v3" + keys_to_ignore_at_inference = ["past_key_values"] + + def __init__( + self, + vocab_size=129280, + hidden_size=7168, + intermediate_size=18432, + moe_intermediate_size = 2048, + num_hidden_layers=61, + num_nextn_predict_layers=1, + num_attention_heads=128, + num_key_value_heads=128, + n_shared_experts = 1, + n_routed_experts = 256, + ep_size = 1, + routed_scaling_factor = 2.5, + kv_lora_rank = 512, + q_lora_rank = 1536, + qk_rope_head_dim = 64, + v_head_dim = 128, + qk_nope_head_dim = 128, + topk_method = 'noaux_tc', + n_group = 8, + topk_group = 4, + num_experts_per_tok = 8, + moe_layer_freq = 1, + first_k_dense_replace = 3, + norm_topk_prob = True, + scoring_func = 'sigmoid', + aux_loss_alpha = 0.001, + seq_aux = True, + hidden_act="silu", + max_position_embeddings=4096, + initializer_range=0.02, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=None, + bos_token_id=0, + eos_token_id=1, + pretraining_tp=1, + tie_word_embeddings=False, + rope_theta=10000.0, + rope_scaling=None, + attention_bias=False, + attention_dropout=0.0, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.moe_intermediate_size = moe_intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_nextn_predict_layers = num_nextn_predict_layers + self.num_attention_heads = num_attention_heads + self.n_shared_experts = n_shared_experts + self.n_routed_experts = n_routed_experts + self.ep_size = ep_size + self.routed_scaling_factor = routed_scaling_factor + self.kv_lora_rank = kv_lora_rank + self.q_lora_rank = q_lora_rank + self.qk_rope_head_dim = qk_rope_head_dim + self.v_head_dim = v_head_dim + self.qk_nope_head_dim = qk_nope_head_dim + self.topk_method = topk_method + self.n_group = n_group + self.topk_group = topk_group + self.num_experts_per_tok = num_experts_per_tok + self.moe_layer_freq = moe_layer_freq + self.first_k_dense_replace = first_k_dense_replace + self.norm_topk_prob = norm_topk_prob + self.scoring_func = scoring_func + self.aux_loss_alpha = aux_loss_alpha + self.seq_aux = seq_aux + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.pretraining_tp = pretraining_tp + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5e6ab72f83b7c8322e8b91cf1155dc6335d5c36d --- /dev/null +++ b/generation_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4dc13c25dceacef9ce0c0b19b75457dad191a8caa2d1af4126cb3c90737bc02 +size 116 diff --git a/model-00001-of-00063.safetensors b/model-00001-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b98d82732537b1b50e4286a244397769c2401c86 --- /dev/null +++ b/model-00001-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98085a9e8ab03f341f7e7c50fd336a20302f0471e2f2e4856f84a388678ef0a6 +size 1853358192 diff --git a/model-00002-of-00063.safetensors b/model-00002-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35b8caaf066e402db51465aefc8823deacb7d020 --- /dev/null +++ b/model-00002-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1cd5dfbad06ac95a25c25c1b53f21681a69a142410894058d1701b093b060b5 +size 1166968184 diff --git a/model-00003-of-00063.safetensors b/model-00003-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7ad847aae9d251ef591dd8250e1902e9a9ef12f --- /dev/null +++ b/model-00003-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d7a5a08819a4bd479a4793f3d90b376e12e4e05a7dbf859c1016cffaa12fce +size 1166968184 diff --git a/model-00004-of-00063.safetensors b/model-00004-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69ccc689eeaf40c4bd5d4679b973695ffdbb41ae --- /dev/null +++ b/model-00004-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1ecacb40ffed3402032a4b8322f0b2e5a8d68203875ae636e89b4ade370f0b9 +size 1166968184 diff --git a/model-00005-of-00063.safetensors b/model-00005-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..576c8e34c4f4e89c392bdec683868b4cd17069da --- /dev/null +++ b/model-00005-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0493180d7a0c8cd75985366572657bac561ff0823e06076d791793e0d7f11d7 +size 6279597088 diff --git a/model-00006-of-00063.safetensors b/model-00006-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2942ee878c848ab18b34ff60c463e31e487a7090 --- /dev/null +++ b/model-00006-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8b5a2fd8e75d17bf4f6869204e6cc252a61eef25a3efe33b2ae9ce42dbf7d66 +size 6279597088 diff --git a/model-00007-of-00063.safetensors b/model-00007-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..926f236206cdc924fe05bc841b66ababde9766dc --- /dev/null +++ b/model-00007-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfcbb6b866b75e43097b30ac1588d3a3827d00cec5397aeb1aad6c6ab04778f +size 6279597088 diff --git a/model-00008-of-00063.safetensors b/model-00008-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c7f050d25067b2676bd953983395109848e566e --- /dev/null +++ b/model-00008-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e7634d5036dc47d2abdecac2a8cce840afea7c71414d36b20d08e8ca2679a05 +size 6279597088 diff --git a/model-00009-of-00063.safetensors b/model-00009-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82c2df1f3649b13797ea270b8daa485ba87def61 --- /dev/null +++ b/model-00009-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de951a1183be03b4e0ddd0d111143d64afd5a7a18a45e0569811468a2966568f +size 6279597088 diff --git a/model-00010-of-00063.safetensors b/model-00010-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b864f9f83684f7a7bb5b101d7e03852a34d13635 --- /dev/null +++ b/model-00010-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea9a50c5e643bd7836f57402a323bb11ea23101ce2d168931c49c7fd298cb66 +size 6279597088 diff --git a/model-00011-of-00063.safetensors b/model-00011-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..670e6852335a7964994a44c12ee7e05c8cfba562 --- /dev/null +++ b/model-00011-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ec9b2894868a280bf41e5c9964a5c3f94ec5ee4a1a79ac485ae615126fb76d +size 6279597088 diff --git a/model-00012-of-00063.safetensors b/model-00012-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34684c704b96393b1d61ad19573b94d5489f60d8 --- /dev/null +++ b/model-00012-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9863a21489b255eeedd74a11c1748c4d5520c208f6620ceb7a59a7d462134f +size 6279599400 diff --git a/model-00013-of-00063.safetensors b/model-00013-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de3f317012612aee4269500ae94d4e6756862010 --- /dev/null +++ b/model-00013-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0647150721ad56b601702cfa76a09fe0522e289932f280ddd8a79c29a64b91ba +size 6279599400 diff --git a/model-00014-of-00063.safetensors b/model-00014-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e81c79dc2f227d5a5b0de4de46b8240a06857238 --- /dev/null +++ b/model-00014-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3aef734a598f720695756944e667d7cee0d55210a9f7c954886a1931cff2889 +size 6279599400 diff --git a/model-00015-of-00063.safetensors b/model-00015-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4c0306e9ae2f063795118f3daf34ee2c75416bc --- /dev/null +++ b/model-00015-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8957dd866b7dd004993a936a4b245e717a4af8183a9cc4902ecec76d3985a4c +size 6279599400 diff --git a/model-00016-of-00063.safetensors b/model-00016-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c89984ae8647f454723656ef27262e996991848 --- /dev/null +++ b/model-00016-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50a736b24210ec6bb32c468aba4f09a10104baae31e73aff821e15585e8f1f8a +size 6279599400 diff --git a/model-00017-of-00063.safetensors b/model-00017-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aea8e3d9efb7403ccee1e0c39b908ef1a1e39fba --- /dev/null +++ b/model-00017-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8f1007f3316373946e364cd21fb4a369fb0d17d995cb9a3424bb195bef13c5 +size 6279599400 diff --git a/model-00018-of-00063.safetensors b/model-00018-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2ab929542d8ecc9258f89083db927047d711b30 --- /dev/null +++ b/model-00018-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df2fbd53182e2de3f35a3383e1cb7c6bd9228d02f786f5c346e224d052cbf68 +size 6279599400 diff --git a/model-00019-of-00063.safetensors b/model-00019-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b998bcfd2facddd2dac12ed54208dab93c3558d --- /dev/null +++ b/model-00019-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b1bfaca47ad55fec26f6af7f00d2de40363a739c959242bae96c9b19f7b19f8 +size 6279599400 diff --git a/model-00020-of-00063.safetensors b/model-00020-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8e289005aef446603e9246a063c49347ecbe40e8 --- /dev/null +++ b/model-00020-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4924b983beb6ad50526adf2774890e80094f4e17c00faac7db41ab29892fa732 +size 6279599400 diff --git a/model-00021-of-00063.safetensors b/model-00021-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdb9f8e020e35325d9d8cd64122b3cac65a29d71 --- /dev/null +++ b/model-00021-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5af594938caccf7c31ce5bc723e313b75b6286f50e0c6123ee113b9005c982a +size 6279599400 diff --git a/model-00022-of-00063.safetensors b/model-00022-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3810ed6a58155fc5efe9e1fa99d536d12af1bc84 --- /dev/null +++ b/model-00022-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219cad5f6fadb9311cfd16be12c3ea05f5c44bcd9b56df27db0cd62a97c5e2ab +size 6279599400 diff --git a/model-00023-of-00063.safetensors b/model-00023-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2a8b38fb84f6e249cd0b592c3882fe3028e2991 --- /dev/null +++ b/model-00023-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4373dd11dcfabb9a4681a95646733516d767b328563adfd8bcf96cf6025d82b5 +size 6279599400 diff --git a/model-00024-of-00063.safetensors b/model-00024-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca4d84f753211e02c050e5349d20ce5e5971aabe --- /dev/null +++ b/model-00024-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac72d89955894dab890beed82d7fdc236bf49a43e0ecaf00df0f44d899d72062 +size 6279599400 diff --git a/model-00025-of-00063.safetensors b/model-00025-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69ad123a6f1bf39a8132af85827411a7b285bcc0 --- /dev/null +++ b/model-00025-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:853389366cb042a8602d7123e51b8d3c29f62ab1df8e17cea75bea73e5fef7b0 +size 6279599400 diff --git a/model-00026-of-00063.safetensors b/model-00026-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03eed943f70037f727fe2bdb9cf33286c2644a68 --- /dev/null +++ b/model-00026-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8637deeeedb7e77e3fc3570722b77579e17cc9411b6239f9995a04597ee651 +size 6279599400 diff --git a/model-00027-of-00063.safetensors b/model-00027-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..675421a587bd825e0e6787baf989fa98d7397b6e --- /dev/null +++ b/model-00027-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c90a3d59dc9fad25081585a61c2f5ff6010cccaaaa5534439d559230302f01f +size 6279599400 diff --git a/model-00028-of-00063.safetensors b/model-00028-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfc569a0c384dac26baad262530fc8f94145802a --- /dev/null +++ b/model-00028-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f50b51ff6865024dc39926ba9d2b0a6e350b994c523234e149b9c03c4a4967 +size 6279599400 diff --git a/model-00029-of-00063.safetensors b/model-00029-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9eec3353c89fc07b21c33ce0ee32d70dfef75f55 --- /dev/null +++ b/model-00029-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50b7d1b905987135922b18fcb6a0e1af95cdc4832048dc36eb0277629952929f +size 6279599400 diff --git a/model-00030-of-00063.safetensors b/model-00030-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c905c38438f5f255a9a206681fa54f957c3d6fb --- /dev/null +++ b/model-00030-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c69856a131de4746d380bc62a02c1683a1113b10f2fedf022fed13fae21ec8 +size 6279599400 diff --git a/model-00031-of-00063.safetensors b/model-00031-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0108c51c963f0481830304e2f964a7c1ae1f4046 --- /dev/null +++ b/model-00031-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f93a90071b1437a496581bbb38113dcac08a0abfaf9b3ce52d968f7cabce56d7 +size 6279599400 diff --git a/model-00032-of-00063.safetensors b/model-00032-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87f01446dcbf526be56197674e3008ba2c6eaa75 --- /dev/null +++ b/model-00032-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c22db9a73146c1d43a93efe74e72666dcbfaa0ffce37d02827190fdbe1f5b0 +size 6279599400 diff --git a/model-00033-of-00063.safetensors b/model-00033-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a2f279c84621d0ffebced4968c05ec4caf644c0 --- /dev/null +++ b/model-00033-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e478f05fd43bf1657f5486bf3f9b84fe8e784f274e0530759d792eb16f59a882 +size 6279599400 diff --git a/model-00034-of-00063.safetensors b/model-00034-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..570e824e608c904c1067e034d7b41bbf73a6918b --- /dev/null +++ b/model-00034-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb686bef2ff9447a18a209835e1a790e1c65749f3df3fade0128e74160c944e +size 6279599400 diff --git a/model-00035-of-00063.safetensors b/model-00035-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..744b4a5ef1cbeeb209ae96a5260291402234f5aa --- /dev/null +++ b/model-00035-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8096a491e46486dda641557c97ad8a2900c76434846a60809721a1cb24c7a91d +size 6279599400 diff --git a/model-00036-of-00063.safetensors b/model-00036-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..509507e12f8c7c9a53ca7149633d37709186be9e --- /dev/null +++ b/model-00036-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3890af877d40506a3792ee2f9c27440530667de92367f064178d2ecd66844928 +size 6279599400 diff --git a/model-00037-of-00063.safetensors b/model-00037-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2545aa7811feff45f37b7a348ec64721b458166 --- /dev/null +++ b/model-00037-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0afb70926f44c34f1cbbf1ec92229d4681b76080fc4c591901455498e4160109 +size 6279599400 diff --git a/model-00038-of-00063.safetensors b/model-00038-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc569895412ecd6dd315852acaae267098df7c3d --- /dev/null +++ b/model-00038-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:905f03cd5a0020926c1525d06813f9fb2cb3f90d974b2be878753c89df8a1ca3 +size 6279599400 diff --git a/model-00039-of-00063.safetensors b/model-00039-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06a67f1099be331888dcfc6d819a874d271992ee --- /dev/null +++ b/model-00039-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6a78573b9fa18b0151391fbf111aa859d78830218d880b53891f6c6251bb31c +size 6279599400 diff --git a/model-00040-of-00063.safetensors b/model-00040-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6522420e12caab2e53ad6df5961817f81b4b7499 --- /dev/null +++ b/model-00040-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ac420fa5cd28c319c344d1bafbfd0db8434b89cfb1645d0b2a8c3a601f3343 +size 6279599400 diff --git a/model-00041-of-00063.safetensors b/model-00041-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..928cb7ed0ef5aa7a85f36d2c53d031d525709787 --- /dev/null +++ b/model-00041-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012e1416b39d6420711004ef7200d90a80b2e29a2d6641554a66c87da1938661 +size 6279599400 diff --git a/model-00042-of-00063.safetensors b/model-00042-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1467b651a179078175fa9e10afb98ee274447ae9 --- /dev/null +++ b/model-00042-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:502067d17fb1e04765d834c89bc0d375d730cc2ace49c821c8795554793ecd0b +size 6279599400 diff --git a/model-00043-of-00063.safetensors b/model-00043-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a40abf6fb6969147c6581777fd1b805bda2c48a2 --- /dev/null +++ b/model-00043-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f7d97bce66095e6960bfba82764e06c8fdba1747ed6909c3df5dd737dca428 +size 6279599400 diff --git a/model-00044-of-00063.safetensors b/model-00044-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..093c9c02a97a9226867b051a12daaa020e0c06e0 --- /dev/null +++ b/model-00044-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd5fcfe4afc31e60dbbea8d54696f13343ffef9470add0c54d3f0df05e68588 +size 6279599400 diff --git a/model-00045-of-00063.safetensors b/model-00045-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5254c8dd7aad1010f9632c7f41f1b8435c274a7c --- /dev/null +++ b/model-00045-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b828a5fc91e75561540d96d5706b4f93241a42c47e117b7aa80fcda1b6d60ed +size 6279599400 diff --git a/model-00046-of-00063.safetensors b/model-00046-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..899be5562f6341cb24888490a482c747e4c65d2d --- /dev/null +++ b/model-00046-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43418dcdfabbade82986be9b4f1b34fa486f9aa85ced3821d4652108274fbd2c +size 6279599400 diff --git a/model-00047-of-00063.safetensors b/model-00047-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6af33d2f63cc5cf2e96bd839a936fce057325b73 --- /dev/null +++ b/model-00047-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6bce3256a20967ca2d79eef586f3e82ee52bc18c3047e84b26b0614c6b96a0 +size 6279599400 diff --git a/model-00048-of-00063.safetensors b/model-00048-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4fa9324a82215e753f13708b80c8432087d02d9 --- /dev/null +++ b/model-00048-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca9464965f6a261651e97bfaf705c875d4c6fa269eb801e67222e309b85a2f1 +size 6279599400 diff --git a/model-00049-of-00063.safetensors b/model-00049-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad199d8dfae812c0e9ce130d87c3174fab83ee2c --- /dev/null +++ b/model-00049-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83cbd125de96e42279d67f9bcb9ec6c6d203fa02ab9f9fe6071960321b7bdd17 +size 6279599400 diff --git a/model-00050-of-00063.safetensors b/model-00050-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cdee4740390cfde88577420c96e240a0f9b2351 --- /dev/null +++ b/model-00050-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeef34a0b7d4321f14062368d4d0ec14a422aec26d52d7f2353674b7ff49c103 +size 6279599400 diff --git a/model-00051-of-00063.safetensors b/model-00051-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ba303c63e22eb03adb1c7844d45344127e3e66d --- /dev/null +++ b/model-00051-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2bae86e16d035030110fbee788c75b1a8d9ffebcbf441d583e7777550fc992 +size 6279599400 diff --git a/model-00052-of-00063.safetensors b/model-00052-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1c64f3b899ef3ac141c23dbe8551eb0c864abc5 --- /dev/null +++ b/model-00052-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e8d9cf9c858ccba4fc2dd37330c86ee7d7393b2e177c1f1e7bebb8475f4c25 +size 6279599400 diff --git a/model-00053-of-00063.safetensors b/model-00053-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2abe682b20cbc0526fa53659af3e5ad873368012 --- /dev/null +++ b/model-00053-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6baa17baa93269cbd89a3c024a4f0083410d95bcca1eca085811d48e17cffc7d +size 6279599400 diff --git a/model-00054-of-00063.safetensors b/model-00054-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..677b51f53805754f59c2037328d85d2174291c6d --- /dev/null +++ b/model-00054-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3685337da66e03a5d36546514e64523a81e9160576a72bdeeab9919bac3599 +size 6279599400 diff --git a/model-00055-of-00063.safetensors b/model-00055-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42ce4cd02bd1236b75bdeade6012eb94a8fcb065 --- /dev/null +++ b/model-00055-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3122391f691c3711884c2af9330e30962cc2ed927ea18ed6792c82ccdd42077b +size 6279599400 diff --git a/model-00056-of-00063.safetensors b/model-00056-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..250acd06bfcf6a3166a668db48db65d12423642f --- /dev/null +++ b/model-00056-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ea4705f92cdf032972417f588b4ea0ac24dfb7ded61846751cfb71428a0a825 +size 6279599400 diff --git a/model-00057-of-00063.safetensors b/model-00057-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19e44a577c8d0791fa3a65a626e363fcdbfbeb28 --- /dev/null +++ b/model-00057-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831feda60b83fae136c5f7d0a83b9d5d8eebe5d27f79695a5f25b374270bd706 +size 6279599400 diff --git a/model-00058-of-00063.safetensors b/model-00058-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1842a3a0af3f76b7e12bac3d58d1c6cc0cd8b42 --- /dev/null +++ b/model-00058-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d783bcbcf4736ca2d0ef0904d03cf086c114e83a34a0b621a7809523f851c93a +size 6279599400 diff --git a/model-00059-of-00063.safetensors b/model-00059-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6ea8c777db05e50975510669bceb5ba09cbaca1 --- /dev/null +++ b/model-00059-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5c4b32ceb1f82e4a9d2b858714b51020e8c3446c2a0cf96c98b0416bbeecad +size 6279599400 diff --git a/model-00060-of-00063.safetensors b/model-00060-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e73ee75008213d99b1d4d1513636fec32e49bf1 --- /dev/null +++ b/model-00060-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e8903fb25a9a51e6f0e316049bc4ae1ec3fe4444a69c16745103c22e81eb60d +size 6279599400 diff --git a/model-00061-of-00063.safetensors b/model-00061-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac748167e2bc980f667ad59defc7d84f7cae7871 --- /dev/null +++ b/model-00061-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6cf9719043eaa9a1bfcba4b43c055df0b9f407e12a812d1742e8a2d089ed833 +size 6279599400 diff --git a/model-00062-of-00063.safetensors b/model-00062-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca7dd4d69db4a31af01833b7034f7b7209da163c --- /dev/null +++ b/model-00062-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab07ed28341d22668d8bb78c07ba25ca4cf4794b5722cce1247a32db5579210 +size 6279599400 diff --git a/model-00063-of-00063.safetensors b/model-00063-of-00063.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8f2a48bd647f5f73495bffbdf8f26bd554048a3 --- /dev/null +++ b/model-00063-of-00063.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3249c5a692d201d6c58d1089090b695d41002c5d66c14690f4f4168dc7d1427 +size 1853372608 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..a9e5c5534567d9e2b320c50115aa1ee290be6f02 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b8788a9f302c5abf6439fb6f4a14d3cdae288d4ffa4a2892b533aef0560f9c +size 12514880 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..55004e9eddc866f8c7786057b895e52f619cecfd --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59cda48bbe8bab9d61ffb410e6e3c07b6d98bff73cee7c88ff8b51f95f21ab1c +size 485 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..b8069d141d985224799b817e12d5e5139cc4a111 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce20a7877bec9454dd611bc4e9116b7db765594f78524bd94edbaab422eddf02 +size 9977280 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..52889810072e93283be8034d4eb1d46598003952 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c941e9f95aaad2fb02f707a549169913f8fb273f6f779bd386249b1d813920e5 +size 166520