dmeck commited on
Commit
8e9e598
·
verified ·
1 Parent(s): a535625

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>
2
+ {%- if tools -%}
3
+ <|system|>
4
+ # 可用工具
5
+ {% for tool in tools %}
6
+ {%- set function = tool.function if tool.get("function") else tool %}
7
+
8
+ ## {{ function.name }}
9
+
10
+ {{ function | tojson(indent=4, ensure_ascii=False) }}
11
+ 在调用上述函数时,请使用 Json 格式表示调用的参数。
12
+ {%- endfor %}
13
+ {%- endif -%}
14
+
15
+ {%- for msg in messages %}
16
+ {%- if msg.role == 'system' %}
17
+ <|system|>
18
+ {{ msg.content }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+
22
+ {%- for message in messages if message.role != 'system' %}
23
+ {%- set role = message['role'] %}
24
+ {%- set content = message['content'] %}
25
+ {%- set meta = message.get("metadata", "") %}
26
+
27
+ {%- if role == 'user' %}
28
+ <|user|>
29
+ {{ content }}
30
+ {%- elif role == 'assistant' and not meta %}
31
+ <|assistant|>
32
+ {{ content }}
33
+ {%- elif role == 'assistant' and meta %}
34
+ <|assistant|>{{ meta }}
35
+ {{ content }}
36
+ {%- elif role == 'observation' %}
37
+ <|observation|>
38
+ {{ content }}
39
+ {%- endif %}
40
+ {%- endfor %}
41
+ {% if add_generation_prompt %}<|assistant|>{% endif %}
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Glm4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": [
8
+ 151329,
9
+ 151336,
10
+ 151338
11
+ ],
12
+ "head_dim": 128,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 6144,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 23040,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "glm4",
19
+ "num_attention_heads": 48,
20
+ "num_hidden_layers": 61,
21
+ "num_key_value_heads": 2,
22
+ "pad_token_id": 151329,
23
+ "partial_rotary_factor": 0.5,
24
+ "quantization_config": {
25
+ "bits": 4,
26
+ "checkpoint_format": "gptq",
27
+ "desc_act": true,
28
+ "group_size": 128,
29
+ "lm_head": false,
30
+ "meta": {
31
+ "damp_auto_increment": 0.0025,
32
+ "damp_percent": 0.01,
33
+ "mse": 0.0,
34
+ "quantizer": [
35
+ "gptqmodel:3.0.0-dev"
36
+ ],
37
+ "static_groups": false,
38
+ "true_sequential": true,
39
+ "uri": "https://github.com/modelcloud/gptqmodel",
40
+ "v2": false,
41
+ "v2_alpha": 0.25
42
+ },
43
+ "pack_dtype": "int32",
44
+ "quant_method": "gptq",
45
+ "sym": true
46
+ },
47
+ "rms_norm_eps": 1e-05,
48
+ "rope_theta": 10000.0,
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.52.0.dev0",
52
+ "use_cache": true,
53
+ "vocab_size": 151552
54
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 151329,
5
+ 151336,
6
+ 151338
7
+ ],
8
+ "pad_token_id": 151329,
9
+ "transformers_version": "4.52.0.dev0"
10
+ }
model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77846d4c7ab459e0ae08f7fd90870d30bf7027032b9aa320722f6b10a64d3f2b
3
+ size 3997276712
model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e461aeb61f79a9ce44d2f78fd3801f19d62c12182f3484d87ed13d5360d4e8
3
+ size 3926340328
model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0c6ac50f083bc76fe6ad5954d370c1dec7df352fbd57fc1beced235a72a786
3
+ size 3926340376
model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f183e36f9096fb0d208eaa0752d50b4f2b93a0c6f0155a4427e11213e8bca8
3
+ size 3926340376
model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6cd860745717c0f9c9f3e9bb5a1fed3fdb1512e19f49b53405998b29e64b25b
3
+ size 3915373880
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quant_log.csv ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.0000000144,0.01000,2.929
3
+ 0,self_attn.v_proj,0.0000000149,0.01000,1.526
4
+ 0,self_attn.q_proj,0.0000003634,0.01000,1.537
5
+ 0,self_attn.o_proj,0.0001249238,0.01000,1.510
6
+ 0,mlp.gate_up_proj,0.0000759352,0.01000,1.753
7
+ 0,mlp.down_proj,4.9248185034,0.01000,7.646
8
+ 1,self_attn.k_proj,0.0000166166,0.01000,1.470
9
+ 1,self_attn.v_proj,0.0000085691,0.01000,1.496
10
+ 1,self_attn.q_proj,0.0000438481,0.01000,1.511
11
+ 1,self_attn.o_proj,0.6955548700,0.01000,1.495
12
+ 1,mlp.gate_up_proj,0.0000933231,0.01000,1.736
13
+ 1,mlp.down_proj,0.0001679857,0.01000,7.665
14
+ 2,self_attn.k_proj,0.0000002660,0.01000,1.464
15
+ 2,self_attn.v_proj,0.0000003256,0.01000,1.489
16
+ 2,self_attn.q_proj,0.0000034409,0.01000,1.507
17
+ 2,self_attn.o_proj,0.0000137586,0.01000,1.494
18
+ 2,mlp.gate_up_proj,0.0001726024,0.01000,1.738
19
+ 2,mlp.down_proj,0.0003692205,0.01000,7.631
20
+ 3,self_attn.k_proj,0.0000004319,0.01000,1.468
21
+ 3,self_attn.v_proj,0.0000004455,0.01000,1.494
22
+ 3,self_attn.q_proj,0.0000043855,0.01000,1.515
23
+ 3,self_attn.o_proj,0.0000306009,0.01000,1.495
24
+ 3,mlp.gate_up_proj,0.0001733597,0.01000,1.763
25
+ 3,mlp.down_proj,0.0001968489,0.01000,7.650
26
+ 4,self_attn.k_proj,0.0000004558,0.01000,1.472
27
+ 4,self_attn.v_proj,0.0000003785,0.01000,1.492
28
+ 4,self_attn.q_proj,0.0000057011,0.01000,1.520
29
+ 4,self_attn.o_proj,0.0000150201,0.01000,1.497
30
+ 4,mlp.gate_up_proj,0.0002348814,0.01000,1.757
31
+ 4,mlp.down_proj,0.0001950488,0.01000,7.644
32
+ 5,self_attn.k_proj,0.0000007762,0.01000,1.486
33
+ 5,self_attn.v_proj,0.0000006402,0.01000,1.513
34
+ 5,self_attn.q_proj,0.0000093449,0.01000,1.535
35
+ 5,self_attn.o_proj,0.0000127081,0.01000,1.519
36
+ 5,mlp.gate_up_proj,0.0004476085,0.01000,1.776
37
+ 5,mlp.down_proj,0.0002605490,0.01000,7.720
38
+ 6,self_attn.k_proj,0.0000010243,0.01000,1.478
39
+ 6,self_attn.v_proj,0.0000008929,0.01000,1.504
40
+ 6,self_attn.q_proj,0.0000120158,0.01000,1.522
41
+ 6,self_attn.o_proj,0.0000048679,0.01000,1.512
42
+ 6,mlp.gate_up_proj,0.0004828231,0.01000,1.774
43
+ 6,mlp.down_proj,0.0002191994,0.01000,7.708
44
+ 7,self_attn.k_proj,0.0000009690,0.01000,1.513
45
+ 7,self_attn.v_proj,0.0000008925,0.01000,1.516
46
+ 7,self_attn.q_proj,0.0000119501,0.01000,1.528
47
+ 7,self_attn.o_proj,0.0000063818,0.01000,1.517
48
+ 7,mlp.gate_up_proj,0.0005537864,0.01000,1.761
49
+ 7,mlp.down_proj,0.0002386200,0.01000,7.716
50
+ 8,self_attn.k_proj,0.0000013532,0.01000,1.484
51
+ 8,self_attn.v_proj,0.0000010706,0.01000,1.516
52
+ 8,self_attn.q_proj,0.0000189551,0.01000,1.548
53
+ 8,self_attn.o_proj,0.0000090043,0.01000,1.511
54
+ 8,mlp.gate_up_proj,0.0005882618,0.01000,1.758
55
+ 8,mlp.down_proj,0.0002215634,0.01000,7.721
56
+ 9,self_attn.k_proj,0.0000015445,0.01000,1.475
57
+ 9,self_attn.v_proj,0.0000011883,0.01000,1.508
58
+ 9,self_attn.q_proj,0.0000195079,0.01000,1.534
59
+ 9,self_attn.o_proj,0.0000100017,0.01000,1.519
60
+ 9,mlp.gate_up_proj,0.0005764399,0.01000,1.756
61
+ 9,mlp.down_proj,0.0002461467,0.01000,7.688
62
+ 10,self_attn.k_proj,0.0000012949,0.01000,1.484
63
+ 10,self_attn.v_proj,0.0000012752,0.01000,1.510
64
+ 10,self_attn.q_proj,0.0000177911,0.01000,1.524
65
+ 10,self_attn.o_proj,0.0000069640,0.01000,1.512
66
+ 10,mlp.gate_up_proj,0.0006654446,0.01000,1.757
67
+ 10,mlp.down_proj,0.0002258848,0.01000,7.713
68
+ 11,self_attn.k_proj,0.0000015689,0.01000,1.485
69
+ 11,self_attn.v_proj,0.0000013155,0.01000,1.518
70
+ 11,self_attn.q_proj,0.0000224711,0.01000,1.542
71
+ 11,self_attn.o_proj,0.0000176500,0.01000,1.514
72
+ 11,mlp.gate_up_proj,0.0005532406,0.01000,1.758
73
+ 11,mlp.down_proj,0.0002012330,0.01000,7.737
74
+ 12,self_attn.k_proj,0.0000019306,0.01000,1.484
75
+ 12,self_attn.v_proj,0.0000012937,0.01000,1.514
76
+ 12,self_attn.q_proj,0.0000255264,0.01000,1.570
77
+ 12,self_attn.o_proj,0.0000130199,0.01000,1.513
78
+ 12,mlp.gate_up_proj,0.0004080150,0.01000,1.756
79
+ 12,mlp.down_proj,0.0001199784,0.01000,7.747
80
+ 13,self_attn.k_proj,0.0000016401,0.01000,1.471
81
+ 13,self_attn.v_proj,0.0000015008,0.01000,1.494
82
+ 13,self_attn.q_proj,0.0000262780,0.01000,1.512
83
+ 13,self_attn.o_proj,0.0000124686,0.01000,1.498
84
+ 13,mlp.gate_up_proj,0.0003443256,0.01000,1.752
85
+ 13,mlp.down_proj,0.0000911502,0.01000,7.650
86
+ 14,self_attn.k_proj,0.0000019886,0.01000,1.474
87
+ 14,self_attn.v_proj,0.0000016479,0.01000,1.509
88
+ 14,self_attn.q_proj,0.0000326347,0.01000,1.519
89
+ 14,self_attn.o_proj,0.0000130200,0.01000,1.510
90
+ 14,mlp.gate_up_proj,0.0002595201,0.01000,1.760
91
+ 14,mlp.down_proj,0.0000595815,0.01000,7.650
92
+ 15,self_attn.k_proj,0.0000011081,0.01000,1.476
93
+ 15,self_attn.v_proj,0.0000010541,0.01000,1.504
94
+ 15,self_attn.q_proj,0.0000183640,0.01000,1.575
95
+ 15,self_attn.o_proj,0.0000130012,0.01000,1.498
96
+ 15,mlp.gate_up_proj,0.0003749305,0.01000,1.745
97
+ 15,mlp.down_proj,0.0001039048,0.01000,7.664
98
+ 16,self_attn.k_proj,0.0000019731,0.01000,1.486
99
+ 16,self_attn.v_proj,0.0000017980,0.01000,1.510
100
+ 16,self_attn.q_proj,0.0000333973,0.01000,1.530
101
+ 16,self_attn.o_proj,0.0000107102,0.01000,1.514
102
+ 16,mlp.gate_up_proj,0.0004214915,0.01000,1.753
103
+ 16,mlp.down_proj,0.0001009013,0.01000,7.693
104
+ 17,self_attn.k_proj,0.0000016455,0.01000,1.489
105
+ 17,self_attn.v_proj,0.0000015577,0.01000,1.521
106
+ 17,self_attn.q_proj,0.0000259446,0.01000,1.543
107
+ 17,self_attn.o_proj,0.0000083092,0.01000,1.516
108
+ 17,mlp.gate_up_proj,0.0003165086,0.01000,1.763
109
+ 17,mlp.down_proj,0.0000607031,0.01000,7.700
110
+ 18,self_attn.k_proj,0.0000020781,0.01000,1.476
111
+ 18,self_attn.v_proj,0.0000018589,0.01000,1.507
112
+ 18,self_attn.q_proj,0.0000341080,0.01000,1.532
113
+ 18,self_attn.o_proj,0.0000055664,0.01000,1.497
114
+ 18,mlp.gate_up_proj,0.0004160402,0.01000,1.759
115
+ 18,mlp.down_proj,0.0000883801,0.01000,7.659
116
+ 19,self_attn.k_proj,0.0000016065,0.01000,1.477
117
+ 19,self_attn.v_proj,0.0000014296,0.01000,1.498
118
+ 19,self_attn.q_proj,0.0000263949,0.01000,1.508
119
+ 19,self_attn.o_proj,0.0000075932,0.01000,1.498
120
+ 19,mlp.gate_up_proj,0.0002235646,0.01000,1.757
121
+ 19,mlp.down_proj,0.0000297038,0.01000,7.687
122
+ 20,self_attn.k_proj,0.0000018283,0.01000,1.484
123
+ 20,self_attn.v_proj,0.0000016944,0.01000,1.516
124
+ 20,self_attn.q_proj,0.0000298715,0.01000,1.529
125
+ 20,self_attn.o_proj,0.0000067423,0.01000,1.527
126
+ 20,mlp.gate_up_proj,0.0003179047,0.01000,1.757
127
+ 20,mlp.down_proj,0.0000497135,0.01000,7.741
128
+ 21,self_attn.k_proj,0.0000012393,0.01000,1.496
129
+ 21,self_attn.v_proj,0.0000014096,0.01000,1.529
130
+ 21,self_attn.q_proj,0.0000161398,0.01000,1.562
131
+ 21,self_attn.o_proj,0.0000086058,0.01000,1.520
132
+ 21,mlp.gate_up_proj,0.0004175537,0.01000,1.765
133
+ 21,mlp.down_proj,0.0000624529,0.01000,7.735
134
+ 22,self_attn.k_proj,0.0000019124,0.01000,1.488
135
+ 22,self_attn.v_proj,0.0000016545,0.01000,1.514
136
+ 22,self_attn.q_proj,0.0000287724,0.01000,1.529
137
+ 22,self_attn.o_proj,0.0000045201,0.01000,1.519
138
+ 22,mlp.gate_up_proj,0.0003837140,0.01000,1.759
139
+ 22,mlp.down_proj,0.0000529506,0.01000,7.719
140
+ 23,self_attn.k_proj,0.0000017570,0.01000,1.494
141
+ 23,self_attn.v_proj,0.0000016161,0.01000,1.520
142
+ 23,self_attn.q_proj,0.0000270148,0.01000,1.550
143
+ 23,self_attn.o_proj,0.0000016576,0.01000,1.524
144
+ 23,mlp.gate_up_proj,0.0004301090,0.01000,1.760
145
+ 23,mlp.down_proj,0.0000556484,0.01000,7.735
146
+ 24,self_attn.k_proj,0.0000016560,0.01000,1.496
147
+ 24,self_attn.v_proj,0.0000021923,0.01000,1.521
148
+ 24,self_attn.q_proj,0.0000179768,0.01000,1.544
149
+ 24,self_attn.o_proj,0.0000042039,0.01000,1.523
150
+ 24,mlp.gate_up_proj,0.0003848278,0.01000,1.782
151
+ 24,mlp.down_proj,0.0000444143,0.01000,7.769
152
+ 25,self_attn.k_proj,0.0000020776,0.01000,1.499
153
+ 25,self_attn.v_proj,0.0000018399,0.01000,1.518
154
+ 25,self_attn.q_proj,0.0000304697,0.01000,1.535
155
+ 25,self_attn.o_proj,0.0000057855,0.01000,1.560
156
+ 25,mlp.gate_up_proj,0.0003310795,0.01000,1.787
157
+ 25,mlp.down_proj,0.0000374100,0.01000,7.721
158
+ 26,self_attn.k_proj,0.0000021007,0.01000,1.501
159
+ 26,self_attn.v_proj,0.0000018366,0.01000,1.553
160
+ 26,self_attn.q_proj,0.0000314466,0.01000,1.560
161
+ 26,self_attn.o_proj,0.0000047819,0.01000,1.520
162
+ 26,mlp.gate_up_proj,0.0003340909,0.01000,1.763
163
+ 26,mlp.down_proj,0.0000360316,0.01000,7.740
164
+ 27,self_attn.k_proj,0.0000021561,0.01000,1.495
165
+ 27,self_attn.v_proj,0.0000018368,0.01000,1.526
166
+ 27,self_attn.q_proj,0.0000330897,0.01000,1.543
167
+ 27,self_attn.o_proj,0.0000045089,0.01000,1.522
168
+ 27,mlp.gate_up_proj,0.0002663189,0.01000,1.770
169
+ 27,mlp.down_proj,0.0000259201,0.01000,7.749
170
+ 28,self_attn.k_proj,0.0000026158,0.01000,1.498
171
+ 28,self_attn.v_proj,0.0000022798,0.01000,1.520
172
+ 28,self_attn.q_proj,0.0000386177,0.01000,1.552
173
+ 28,self_attn.o_proj,0.0000052657,0.01000,1.522
174
+ 28,mlp.gate_up_proj,0.0003065639,0.01000,1.794
175
+ 28,mlp.down_proj,0.0000314570,0.01000,7.720
176
+ 29,self_attn.k_proj,0.0000023923,0.01000,1.489
177
+ 29,self_attn.v_proj,0.0000022201,0.01000,1.515
178
+ 29,self_attn.q_proj,0.0000375617,0.01000,1.533
179
+ 29,self_attn.o_proj,0.0000060325,0.01000,1.526
180
+ 29,mlp.gate_up_proj,0.0002587825,0.01000,1.764
181
+ 29,mlp.down_proj,0.0000264966,0.01000,7.725
182
+ 30,self_attn.k_proj,0.0000029710,0.01000,1.491
183
+ 30,self_attn.v_proj,0.0000028491,0.01000,1.541
184
+ 30,self_attn.q_proj,0.0000480602,0.01000,1.540
185
+ 30,self_attn.o_proj,0.0000023055,0.01000,1.523
186
+ 30,mlp.gate_up_proj,0.0002376178,0.01000,1.758
187
+ 30,mlp.down_proj,0.0000210452,0.01000,7.753
188
+ 31,self_attn.k_proj,0.0000027275,0.01000,1.496
189
+ 31,self_attn.v_proj,0.0000028166,0.01000,1.516
190
+ 31,self_attn.q_proj,0.0000448323,0.01000,1.542
191
+ 31,self_attn.o_proj,0.0000031633,0.01000,1.537
192
+ 31,mlp.gate_up_proj,0.0002244233,0.01000,1.758
193
+ 31,mlp.down_proj,0.0000184050,0.01000,7.731
194
+ 32,self_attn.k_proj,0.0000028851,0.01000,1.491
195
+ 32,self_attn.v_proj,0.0000028860,0.01000,1.524
196
+ 32,self_attn.q_proj,0.0000488739,0.01000,1.544
197
+ 32,self_attn.o_proj,0.0000038245,0.01000,1.522
198
+ 32,mlp.gate_up_proj,0.0002103205,0.01000,1.765
199
+ 32,mlp.down_proj,0.0000162122,0.01000,7.755
200
+ 33,self_attn.k_proj,0.0000018585,0.01000,1.493
201
+ 33,self_attn.v_proj,0.0000018939,0.01000,1.534
202
+ 33,self_attn.q_proj,0.0000318633,0.01000,1.554
203
+ 33,self_attn.o_proj,0.0000035201,0.01000,1.554
204
+ 33,mlp.gate_up_proj,0.0001825548,0.01000,1.782
205
+ 33,mlp.down_proj,0.0000126821,0.01000,7.742
206
+ 34,self_attn.k_proj,0.0000019409,0.01000,1.504
207
+ 34,self_attn.v_proj,0.0000017755,0.01000,1.529
208
+ 34,self_attn.q_proj,0.0000312464,0.01000,1.569
209
+ 34,self_attn.o_proj,0.0000106473,0.01000,1.523
210
+ 34,mlp.gate_up_proj,0.0001308567,0.01000,1.761
211
+ 34,mlp.down_proj,0.0000084482,0.01000,7.757
212
+ 35,self_attn.k_proj,0.0000020172,0.01000,1.502
213
+ 35,self_attn.v_proj,0.0000017908,0.01000,1.526
214
+ 35,self_attn.q_proj,0.0000337882,0.01000,1.550
215
+ 35,self_attn.o_proj,0.0000087794,0.01000,1.528
216
+ 35,mlp.gate_up_proj,0.0001125164,0.01000,1.797
217
+ 35,mlp.down_proj,0.0000068781,0.01000,7.763
218
+ 36,self_attn.k_proj,0.0000021642,0.01000,1.475
219
+ 36,self_attn.v_proj,0.0000020408,0.01000,1.504
220
+ 36,self_attn.q_proj,0.0000388233,0.01000,1.540
221
+ 36,self_attn.o_proj,0.0000083437,0.01000,1.503
222
+ 36,mlp.gate_up_proj,0.0001143253,0.01000,1.747
223
+ 36,mlp.down_proj,0.0000070406,0.01000,7.668
224
+ 37,self_attn.k_proj,0.0000018855,0.01000,1.474
225
+ 37,self_attn.v_proj,0.0000017017,0.01000,1.498
226
+ 37,self_attn.q_proj,0.0000330188,0.01000,1.517
227
+ 37,self_attn.o_proj,0.0000095858,0.01000,1.521
228
+ 37,mlp.gate_up_proj,0.0000955174,0.01000,1.788
229
+ 37,mlp.down_proj,0.0000051665,0.01000,7.658
230
+ 38,self_attn.k_proj,0.0000026216,0.01000,1.474
231
+ 38,self_attn.v_proj,0.0000024581,0.01000,1.500
232
+ 38,self_attn.q_proj,0.0000449249,0.01000,1.515
233
+ 38,self_attn.o_proj,0.0000140097,0.01000,1.498
234
+ 38,mlp.gate_up_proj,0.0000959132,0.01000,1.748
235
+ 38,mlp.down_proj,0.0000055332,0.01000,7.754
236
+ 39,self_attn.k_proj,0.0000023764,0.01000,1.505
237
+ 39,self_attn.v_proj,0.0000021691,0.01000,1.514
238
+ 39,self_attn.q_proj,0.0000410026,0.01000,1.538
239
+ 39,self_attn.o_proj,0.0000092969,0.01000,1.514
240
+ 39,mlp.gate_up_proj,0.0001086052,0.01000,1.762
241
+ 39,mlp.down_proj,0.0000062391,0.01000,7.746
242
+ 40,self_attn.k_proj,0.0000026411,0.01000,1.513
243
+ 40,self_attn.v_proj,0.0000024685,0.01000,1.507
244
+ 40,self_attn.q_proj,0.0000463585,0.01000,1.522
245
+ 40,self_attn.o_proj,0.0000110130,0.01000,1.514
246
+ 40,mlp.gate_up_proj,0.0000871354,0.01000,1.754
247
+ 40,mlp.down_proj,0.0000039779,0.01000,7.759
248
+ 41,self_attn.k_proj,0.0000020181,0.01000,1.486
249
+ 41,self_attn.v_proj,0.0000021271,0.01000,1.507
250
+ 41,self_attn.q_proj,0.0000409969,0.01000,1.547
251
+ 41,self_attn.o_proj,0.0000054175,0.01000,1.539
252
+ 41,mlp.gate_up_proj,0.0000830821,0.01000,1.766
253
+ 41,mlp.down_proj,0.0000035916,0.01000,7.751
254
+ 42,self_attn.k_proj,0.0000024863,0.01000,1.483
255
+ 42,self_attn.v_proj,0.0000025053,0.01000,1.510
256
+ 42,self_attn.q_proj,0.0000468063,0.01000,1.528
257
+ 42,self_attn.o_proj,0.0000105437,0.01000,1.514
258
+ 42,mlp.gate_up_proj,0.0000811200,0.01000,1.754
259
+ 42,mlp.down_proj,0.0000035646,0.01000,7.721
260
+ 43,self_attn.k_proj,0.0000023590,0.01000,1.483
261
+ 43,self_attn.v_proj,0.0000022048,0.01000,1.507
262
+ 43,self_attn.q_proj,0.0000429752,0.01000,1.530
263
+ 43,self_attn.o_proj,0.0000107138,0.01000,1.521
264
+ 43,mlp.gate_up_proj,0.0000761639,0.01000,1.764
265
+ 43,mlp.down_proj,0.0000033740,0.01000,7.753
266
+ 44,self_attn.k_proj,0.0000022945,0.01000,1.484
267
+ 44,self_attn.v_proj,0.0000024551,0.01000,1.512
268
+ 44,self_attn.q_proj,0.0000470545,0.01000,1.537
269
+ 44,self_attn.o_proj,0.0000083398,0.01000,1.520
270
+ 44,mlp.gate_up_proj,0.0000717656,0.01000,1.758
271
+ 44,mlp.down_proj,0.0000031455,0.01000,7.725
272
+ 45,self_attn.k_proj,0.0000026021,0.01000,1.494
273
+ 45,self_attn.v_proj,0.0000024987,0.01000,1.522
274
+ 45,self_attn.q_proj,0.0000503206,0.01000,1.538
275
+ 45,self_attn.o_proj,0.0000093540,0.01000,1.514
276
+ 45,mlp.gate_up_proj,0.0000653584,0.01000,1.777
277
+ 45,mlp.down_proj,0.0000027631,0.01000,7.734
278
+ 46,self_attn.k_proj,0.0000023923,0.01000,1.496
279
+ 46,self_attn.v_proj,0.0000025303,0.01000,1.519
280
+ 46,self_attn.q_proj,0.0000464908,0.01000,1.534
281
+ 46,self_attn.o_proj,0.0000092665,0.01000,1.522
282
+ 46,mlp.gate_up_proj,0.0000673613,0.01000,1.763
283
+ 46,mlp.down_proj,0.0000032540,0.01000,7.743
284
+ 47,self_attn.k_proj,0.0000022541,0.01000,1.482
285
+ 47,self_attn.v_proj,0.0000024099,0.01000,1.507
286
+ 47,self_attn.q_proj,0.0000424197,0.01000,1.529
287
+ 47,self_attn.o_proj,0.0000087173,0.01000,1.520
288
+ 47,mlp.gate_up_proj,0.0000696124,0.01000,1.758
289
+ 47,mlp.down_proj,0.0000032250,0.01000,7.752
290
+ 48,self_attn.k_proj,0.0000022253,0.01000,1.492
291
+ 48,self_attn.v_proj,0.0000022728,0.01000,1.514
292
+ 48,self_attn.q_proj,0.0000427026,0.01000,1.553
293
+ 48,self_attn.o_proj,0.0000073344,0.01000,1.540
294
+ 48,mlp.gate_up_proj,0.0000666436,0.01000,1.771
295
+ 48,mlp.down_proj,0.0000034516,0.01000,7.751
296
+ 49,self_attn.k_proj,0.0000023598,0.01000,1.489
297
+ 49,self_attn.v_proj,0.0000025774,0.01000,1.522
298
+ 49,self_attn.q_proj,0.0000439942,0.01000,1.545
299
+ 49,self_attn.o_proj,0.0000058884,0.01000,1.524
300
+ 49,mlp.gate_up_proj,0.0001054278,0.01000,1.762
301
+ 49,mlp.down_proj,0.0000066771,0.01000,7.784
302
+ 50,self_attn.k_proj,0.0000023662,0.01000,1.478
303
+ 50,self_attn.v_proj,0.0000027904,0.01000,1.504
304
+ 50,self_attn.q_proj,0.0000424220,0.01000,1.526
305
+ 50,self_attn.o_proj,0.0000072030,0.01000,1.516
306
+ 50,mlp.gate_up_proj,0.0001290033,0.01000,1.755
307
+ 50,mlp.down_proj,0.0000091810,0.01000,7.675
308
+ 51,self_attn.k_proj,0.0000022649,0.01000,1.489
309
+ 51,self_attn.v_proj,0.0000022110,0.01000,1.517
310
+ 51,self_attn.q_proj,0.0000381388,0.01000,1.558
311
+ 51,self_attn.o_proj,0.0000036714,0.01000,1.539
312
+ 51,mlp.gate_up_proj,0.0001260655,0.01000,1.749
313
+ 51,mlp.down_proj,0.0000091918,0.01000,7.712
314
+ 52,self_attn.k_proj,0.0000024394,0.01000,1.474
315
+ 52,self_attn.v_proj,0.0000023174,0.01000,1.500
316
+ 52,self_attn.q_proj,0.0000422424,0.01000,1.547
317
+ 52,self_attn.o_proj,0.0000042098,0.01000,1.526
318
+ 52,mlp.gate_up_proj,0.0001209586,0.01000,1.776
319
+ 52,mlp.down_proj,0.0000086012,0.01000,7.724
320
+ 53,self_attn.k_proj,0.0000020518,0.01000,1.503
321
+ 53,self_attn.v_proj,0.0000024131,0.01000,1.522
322
+ 53,self_attn.q_proj,0.0000456590,0.01000,1.566
323
+ 53,self_attn.o_proj,0.0000017261,0.01000,1.529
324
+ 53,mlp.gate_up_proj,0.0001240569,0.01000,1.771
325
+ 53,mlp.down_proj,0.0000086225,0.01000,7.725
326
+ 54,self_attn.k_proj,0.0000021914,0.01000,1.496
327
+ 54,self_attn.v_proj,0.0000026488,0.01000,1.550
328
+ 54,self_attn.q_proj,0.0000383103,0.01000,1.556
329
+ 54,self_attn.o_proj,0.0000011243,0.01000,1.553
330
+ 54,mlp.gate_up_proj,0.0001322332,0.01000,1.801
331
+ 54,mlp.down_proj,0.0000096043,0.01000,7.784
332
+ 55,self_attn.k_proj,0.0000021482,0.01000,1.485
333
+ 55,self_attn.v_proj,0.0000023087,0.01000,1.506
334
+ 55,self_attn.q_proj,0.0000335327,0.01000,1.527
335
+ 55,self_attn.o_proj,0.0000010707,0.01000,1.513
336
+ 55,mlp.gate_up_proj,0.0001262408,0.01000,1.763
337
+ 55,mlp.down_proj,0.0000091236,0.01000,7.742
338
+ 56,self_attn.k_proj,0.0000020794,0.01000,1.475
339
+ 56,self_attn.v_proj,0.0000026723,0.01000,1.510
340
+ 56,self_attn.q_proj,0.0000372959,0.01000,1.531
341
+ 56,self_attn.o_proj,0.0000023217,0.01000,1.514
342
+ 56,mlp.gate_up_proj,0.0001405949,0.01000,1.746
343
+ 56,mlp.down_proj,0.0000117001,0.01000,7.710
344
+ 57,self_attn.k_proj,0.0000021717,0.01000,1.477
345
+ 57,self_attn.v_proj,0.0000020618,0.01000,1.543
346
+ 57,self_attn.q_proj,0.0000394623,0.01000,1.537
347
+ 57,self_attn.o_proj,0.0000070200,0.01000,1.507
348
+ 57,mlp.gate_up_proj,0.0001650417,0.01000,1.744
349
+ 57,mlp.down_proj,0.0000167601,0.01000,7.679
350
+ 58,self_attn.k_proj,0.0000021242,0.01000,1.501
351
+ 58,self_attn.v_proj,0.0000021589,0.01000,1.528
352
+ 58,self_attn.q_proj,0.0000392971,0.01000,1.574
353
+ 58,self_attn.o_proj,0.0000022084,0.01000,1.542
354
+ 58,mlp.gate_up_proj,0.0002288546,0.01000,1.770
355
+ 58,mlp.down_proj,0.0000346537,0.01000,7.762
356
+ 59,self_attn.k_proj,0.0000021596,0.01000,1.490
357
+ 59,self_attn.v_proj,0.0000019384,0.01000,1.514
358
+ 59,self_attn.q_proj,0.0000360824,0.01000,1.533
359
+ 59,self_attn.o_proj,0.0000036960,0.01000,1.520
360
+ 59,mlp.gate_up_proj,0.0002469802,0.01000,1.763
361
+ 59,mlp.down_proj,0.0000465371,0.01000,7.804
362
+ 60,self_attn.k_proj,0.0000019967,0.01000,1.491
363
+ 60,self_attn.v_proj,0.0000018081,0.01000,1.523
364
+ 60,self_attn.q_proj,0.0000359417,0.01000,1.548
365
+ 60,self_attn.o_proj,0.0000057281,0.01000,1.521
366
+ 60,mlp.gate_up_proj,0.0002825791,0.01000,1.766
367
+ 60,mlp.down_proj,0.0000614542,0.01000,7.746
quantize_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": true,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:3.0.0-dev"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.01,
16
+ "damp_auto_increment": 0.0025,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0,
20
+ "v2": false,
21
+ "v2_alpha": 0.25
22
+ }
23
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "[MASK]",
5
+ "[gMASK]",
6
+ "[sMASK]",
7
+ "<sop>",
8
+ "<eop>",
9
+ "<|system|>",
10
+ "<|user|>",
11
+ "<|assistant|>",
12
+ "<|observation|>",
13
+ "<|begin_of_image|>",
14
+ "<|end_of_image|>",
15
+ "<|begin_of_video|>",
16
+ "<|end_of_video|>"
17
+ ],
18
+ "eos_token": {
19
+ "content": "<|user|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": "<|endoftext|>"
26
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c
3
+ size 19966496
tokenizer_config.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "151329": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "151330": {
12
+ "content": "[MASK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "151331": {
20
+ "content": "[gMASK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "151332": {
28
+ "content": "[sMASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "151333": {
36
+ "content": "<sop>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "151334": {
44
+ "content": "<eop>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "151335": {
52
+ "content": "<|system|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "151336": {
60
+ "content": "<|user|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "151337": {
68
+ "content": "<|assistant|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "151338": {
76
+ "content": "<|observation|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "151339": {
84
+ "content": "<|begin_of_image|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "151340": {
92
+ "content": "<|end_of_image|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "151341": {
100
+ "content": "<|begin_of_video|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "151342": {
108
+ "content": "<|end_of_video|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ }
115
+ },
116
+ "additional_special_tokens": [
117
+ "<|endoftext|>",
118
+ "[MASK]",
119
+ "[gMASK]",
120
+ "[sMASK]",
121
+ "<sop>",
122
+ "<eop>",
123
+ "<|system|>",
124
+ "<|user|>",
125
+ "<|assistant|>",
126
+ "<|observation|>",
127
+ "<|begin_of_image|>",
128
+ "<|end_of_image|>",
129
+ "<|begin_of_video|>",
130
+ "<|end_of_video|>"
131
+ ],
132
+ "clean_up_tokenization_spaces": false,
133
+ "do_lower_case": false,
134
+ "eos_token": "<|user|>",
135
+ "extra_special_tokens": {},
136
+ "model_input_names": [
137
+ "input_ids",
138
+ "attention_mask"
139
+ ],
140
+ "model_max_length": 128000,
141
+ "pad_token": "<|endoftext|>",
142
+ "padding_side": "left",
143
+ "remove_space": false,
144
+ "tokenizer_class": "PreTrainedTokenizerFast",
145
+ "_commit_hash": null
146
+ }