dmeck commited on
Commit
a59a079
·
verified ·
1 Parent(s): 3fc4c84

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>{% for item in messages %}{% if item['tools'] is defined %}<|system|>
2
+ 你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。
3
+
4
+ # 可用工具{% set tools = item['tools'] %}{% for tool in tools %}{% if tool['type'] == 'function' %}
5
+
6
+ ## {{ tool['function']['name'] }}
7
+
8
+ {{ tool['function'] | tojson(indent=4) }}
9
+ 在调用上述函数时,请使用 Json 格式表示调用的参数。{% elif tool['type'] == 'python' %}
10
+
11
+ ## python
12
+
13
+ 当你向 `python` 发送包含 Python 代码的消息时,该代码将会在一个有状态的 Jupyter notebook 环境中执行。
14
+ `python` 返回代码执行的输出,或在执行 60 秒后返回超时。
15
+ `/mnt/data` 将会持久化存储你的文件。在此会话中,`python` 无法访问互联网。不要使用 `python` 进行任何网络请求或者在线 API 调用,这些在线内容的访问将不会成功。{% elif tool['type'] == 'simple_browser' %}
16
+
17
+ ## simple_browser
18
+
19
+ 你可以使用 `simple_browser` 工具。该工具支持以下函数:
20
+ `search(query: str, recency_days: int)`:使用搜索引擎进行查询并显示结果,可以使用 `recency_days` 参数控制搜索内容的时效性。
21
+ `mclick(ids: list[int])`:获取一系列指定 id 的页面内容。每次调用时,须选择3-10个页面。选择多个角度的页面,同时尽可能选择可信任的信息来源。考虑到部分页面是无法加载的,你也可以多打开一些可能有用的页面而不用担心内容过多。
22
+ `open_url(url: str)`:打开指定的 URL。
23
+
24
+ 使用 `【{引用 id}†{引用文本}】` 来引用内容。
25
+
26
+ 操作步骤:1. 使用 `search` 来获得信息列表; 2. 使用 `mclick` 来获取指定 ID 页面的内容; 3. 根据获得的内容进行回复。在回复中应当引用信息来源。
27
+ 如果用户提供了 URL,也可以用 `open_url` 直接打开页面。
28
+ 如果初次搜索结果没有找到合适的信息,也可以再次使用 `search` 进行搜索。{% elif tool['type'] == 'cogview' %}
29
+
30
+ ## cogview
31
+
32
+ 如果用户的请求中包含了对图像的描述,你可以使用 `cogview` 来生成图像并展示给用户。你需要向 `cogview` 发送图像描述,规则:
33
+ - 发送给 `cogview` 的消息必须使用英语。用户的中文描述必须完全翻译为英语。
34
+ - 应当尽可能详细地描述图像生成的需求,需求描述约 100 英文单词。
35
+ - 保持用户原始描述的意图。不要虚构内容或者没见过的人物。
36
+ - 如无特殊说明,所在地为中国,持有中国立场并遵循中国社会主义价值观。{% endif %}{% endfor %}{% endif %}{% if item['content'] %}<|{{ item['role'] }}|>{{ item['metadata'] }}
37
+ {{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Glm4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": [
8
+ 151329,
9
+ 151336,
10
+ 151338
11
+ ],
12
+ "head_dim": 128,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 6144,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 23040,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "glm4",
19
+ "num_attention_heads": 48,
20
+ "num_hidden_layers": 61,
21
+ "num_key_value_heads": 2,
22
+ "pad_token_id": 151329,
23
+ "partial_rotary_factor": 0.5,
24
+ "quantization_config": {
25
+ "bits": 4,
26
+ "checkpoint_format": "gptq",
27
+ "desc_act": true,
28
+ "group_size": 128,
29
+ "lm_head": false,
30
+ "meta": {
31
+ "damp_auto_increment": 0.0025,
32
+ "damp_percent": 0.01,
33
+ "mse": 0.0,
34
+ "quantizer": [
35
+ "gptqmodel:3.0.0-dev"
36
+ ],
37
+ "static_groups": false,
38
+ "true_sequential": true,
39
+ "uri": "https://github.com/modelcloud/gptqmodel",
40
+ "v2": false,
41
+ "v2_alpha": 0.25
42
+ },
43
+ "pack_dtype": "int32",
44
+ "quant_method": "gptq",
45
+ "sym": true
46
+ },
47
+ "rms_norm_eps": 1e-05,
48
+ "rope_theta": 10000.0,
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.52.0.dev0",
52
+ "use_cache": true,
53
+ "vocab_size": 151552
54
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 151329,
5
+ 151336,
6
+ 151338
7
+ ],
8
+ "pad_token_id": 151329,
9
+ "transformers_version": "4.52.0.dev0"
10
+ }
model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1df30b3d3612d5257f4609a4c4c93654530516aedb4bd7c4b36bbdf10cbc45a
3
+ size 3997276712
model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3372a9b78373f84424e26162d1d2bf679636bfa2098efdba1bebb99664fa59b4
3
+ size 3926340328
model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43685e350295cd61ce8cdb9578b98ed232100e8d590f8784156c28a8b21671f2
3
+ size 3926340376
model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b946170e6f01b8484c88144d78005fc3c9dca0bb1857663fd264b0e0987a8b4f
3
+ size 3926340376
model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40c4c8a078e61bc2f45557095db892507f59f07be4bd0e0e2a6637f9bef6634d
3
+ size 3915373880
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quant_log.csv ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,0.0000000145,0.01000,3.232
3
+ 0,self_attn.v_proj,0.0000000149,0.01000,1.482
4
+ 0,self_attn.q_proj,0.0000003653,0.01000,1.487
5
+ 0,self_attn.o_proj,0.0001262193,0.01000,1.503
6
+ 0,mlp.gate_up_proj,0.0000764948,0.01000,1.758
7
+ 0,mlp.down_proj,5.0148693478,0.01000,7.667
8
+ 1,self_attn.k_proj,0.0000168193,0.01000,1.475
9
+ 1,self_attn.v_proj,0.0000085385,0.01000,1.469
10
+ 1,self_attn.q_proj,0.0000442424,0.01000,1.494
11
+ 1,self_attn.o_proj,0.7176740298,0.01000,1.492
12
+ 1,mlp.gate_up_proj,0.0000949941,0.01000,1.718
13
+ 1,mlp.down_proj,0.0001703329,0.01000,7.627
14
+ 2,self_attn.k_proj,0.0000002483,0.01000,1.463
15
+ 2,self_attn.v_proj,0.0000003033,0.01000,1.463
16
+ 2,self_attn.q_proj,0.0000032021,0.01000,1.491
17
+ 2,self_attn.o_proj,0.0000128015,0.01000,1.495
18
+ 2,mlp.gate_up_proj,0.0001703651,0.01000,1.708
19
+ 2,mlp.down_proj,0.0004005801,0.01000,7.607
20
+ 3,self_attn.k_proj,0.0000004315,0.01000,1.455
21
+ 3,self_attn.v_proj,0.0000004436,0.01000,1.454
22
+ 3,self_attn.q_proj,0.0000043756,0.01000,1.480
23
+ 3,self_attn.o_proj,0.0000316553,0.01000,1.482
24
+ 3,mlp.gate_up_proj,0.0001707317,0.01000,1.702
25
+ 3,mlp.down_proj,0.0001986987,0.01000,7.533
26
+ 4,self_attn.k_proj,0.0000004457,0.01000,1.449
27
+ 4,self_attn.v_proj,0.0000003737,0.01000,1.466
28
+ 4,self_attn.q_proj,0.0000056278,0.01000,1.490
29
+ 4,self_attn.o_proj,0.0000149709,0.01000,1.481
30
+ 4,mlp.gate_up_proj,0.0002327465,0.01000,1.697
31
+ 4,mlp.down_proj,0.0001969735,0.01000,7.541
32
+ 5,self_attn.k_proj,0.0000007596,0.01000,1.456
33
+ 5,self_attn.v_proj,0.0000006272,0.01000,1.459
34
+ 5,self_attn.q_proj,0.0000091453,0.01000,1.482
35
+ 5,self_attn.o_proj,0.0000126290,0.01000,1.489
36
+ 5,mlp.gate_up_proj,0.0004402362,0.01000,1.698
37
+ 5,mlp.down_proj,0.0002617813,0.01000,7.548
38
+ 6,self_attn.k_proj,0.0000010017,0.01000,1.457
39
+ 6,self_attn.v_proj,0.0000008704,0.01000,1.446
40
+ 6,self_attn.q_proj,0.0000117208,0.01000,1.477
41
+ 6,self_attn.o_proj,0.0000048481,0.01000,1.486
42
+ 6,mlp.gate_up_proj,0.0004765442,0.01000,1.702
43
+ 6,mlp.down_proj,0.0002266615,0.01000,7.543
44
+ 7,self_attn.k_proj,0.0000009495,0.01000,1.472
45
+ 7,self_attn.v_proj,0.0000008748,0.01000,1.459
46
+ 7,self_attn.q_proj,0.0000117135,0.01000,1.486
47
+ 7,self_attn.o_proj,0.0000061737,0.01000,1.496
48
+ 7,mlp.gate_up_proj,0.0005468666,0.01000,1.721
49
+ 7,mlp.down_proj,0.0002463890,0.01000,7.569
50
+ 8,self_attn.k_proj,0.0000013329,0.01000,1.467
51
+ 8,self_attn.v_proj,0.0000010509,0.01000,1.467
52
+ 8,self_attn.q_proj,0.0000186501,0.01000,1.496
53
+ 8,self_attn.o_proj,0.0000087684,0.01000,1.501
54
+ 8,mlp.gate_up_proj,0.0005815079,0.01000,1.718
55
+ 8,mlp.down_proj,0.0002314437,0.01000,7.613
56
+ 9,self_attn.k_proj,0.0000015154,0.01000,1.480
57
+ 9,self_attn.v_proj,0.0000011645,0.01000,1.472
58
+ 9,self_attn.q_proj,0.0000191247,0.01000,1.496
59
+ 9,self_attn.o_proj,0.0000096682,0.01000,1.501
60
+ 9,mlp.gate_up_proj,0.0005639126,0.01000,1.720
61
+ 9,mlp.down_proj,0.0002467135,0.01000,7.607
62
+ 10,self_attn.k_proj,0.0000012634,0.01000,1.465
63
+ 10,self_attn.v_proj,0.0000012471,0.01000,1.462
64
+ 10,self_attn.q_proj,0.0000173527,0.01000,1.488
65
+ 10,self_attn.o_proj,0.0000069165,0.01000,1.517
66
+ 10,mlp.gate_up_proj,0.0006487251,0.01000,1.716
67
+ 10,mlp.down_proj,0.0002332547,0.01000,7.633
68
+ 11,self_attn.k_proj,0.0000015278,0.01000,1.466
69
+ 11,self_attn.v_proj,0.0000012755,0.01000,1.465
70
+ 11,self_attn.q_proj,0.0000217987,0.01000,1.492
71
+ 11,self_attn.o_proj,0.0000166530,0.01000,1.497
72
+ 11,mlp.gate_up_proj,0.0005381580,0.01000,1.714
73
+ 11,mlp.down_proj,0.0002000954,0.01000,7.587
74
+ 12,self_attn.k_proj,0.0000018726,0.01000,1.470
75
+ 12,self_attn.v_proj,0.0000012596,0.01000,1.462
76
+ 12,self_attn.q_proj,0.0000248075,0.01000,1.518
77
+ 12,self_attn.o_proj,0.0000123625,0.01000,1.496
78
+ 12,mlp.gate_up_proj,0.0003960925,0.01000,1.709
79
+ 12,mlp.down_proj,0.0001173956,0.01000,7.654
80
+ 13,self_attn.k_proj,0.0000015829,0.01000,1.510
81
+ 13,self_attn.v_proj,0.0000014553,0.01000,1.466
82
+ 13,self_attn.q_proj,0.0000254443,0.01000,1.498
83
+ 13,self_attn.o_proj,0.0000117004,0.01000,1.505
84
+ 13,mlp.gate_up_proj,0.0003372296,0.01000,1.753
85
+ 13,mlp.down_proj,0.0000930237,0.01000,7.634
86
+ 14,self_attn.k_proj,0.0000019177,0.01000,1.467
87
+ 14,self_attn.v_proj,0.0000015900,0.01000,1.478
88
+ 14,self_attn.q_proj,0.0000314218,0.01000,1.509
89
+ 14,self_attn.o_proj,0.0000123227,0.01000,1.494
90
+ 14,mlp.gate_up_proj,0.0002548239,0.01000,1.709
91
+ 14,mlp.down_proj,0.0000611669,0.01000,7.577
92
+ 15,self_attn.k_proj,0.0000010581,0.01000,1.472
93
+ 15,self_attn.v_proj,0.0000010090,0.01000,1.463
94
+ 15,self_attn.q_proj,0.0000175309,0.01000,1.490
95
+ 15,self_attn.o_proj,0.0000123919,0.01000,1.516
96
+ 15,mlp.gate_up_proj,0.0003601774,0.01000,1.714
97
+ 15,mlp.down_proj,0.0001024444,0.01000,7.662
98
+ 16,self_attn.k_proj,0.0000018965,0.01000,1.471
99
+ 16,self_attn.v_proj,0.0000017374,0.01000,1.469
100
+ 16,self_attn.q_proj,0.0000321455,0.01000,1.501
101
+ 16,self_attn.o_proj,0.0000101969,0.01000,1.492
102
+ 16,mlp.gate_up_proj,0.0004014389,0.01000,1.705
103
+ 16,mlp.down_proj,0.0000987502,0.01000,7.579
104
+ 17,self_attn.k_proj,0.0000015646,0.01000,1.469
105
+ 17,self_attn.v_proj,0.0000014850,0.01000,1.470
106
+ 17,self_attn.q_proj,0.0000247416,0.01000,1.489
107
+ 17,self_attn.o_proj,0.0000078919,0.01000,1.493
108
+ 17,mlp.gate_up_proj,0.0002989558,0.01000,1.708
109
+ 17,mlp.down_proj,0.0000594292,0.01000,7.575
110
+ 18,self_attn.k_proj,0.0000019812,0.01000,1.468
111
+ 18,self_attn.v_proj,0.0000017710,0.01000,1.464
112
+ 18,self_attn.q_proj,0.0000324620,0.01000,1.494
113
+ 18,self_attn.o_proj,0.0000050908,0.01000,1.510
114
+ 18,mlp.gate_up_proj,0.0003917195,0.01000,1.714
115
+ 18,mlp.down_proj,0.0000869121,0.01000,7.620
116
+ 19,self_attn.k_proj,0.0000015210,0.01000,1.496
117
+ 19,self_attn.v_proj,0.0000013525,0.01000,1.472
118
+ 19,self_attn.q_proj,0.0000250286,0.01000,1.480
119
+ 19,self_attn.o_proj,0.0000067775,0.01000,1.485
120
+ 19,mlp.gate_up_proj,0.0002115343,0.01000,1.720
121
+ 19,mlp.down_proj,0.0000295554,0.01000,7.577
122
+ 20,self_attn.k_proj,0.0000016902,0.01000,1.484
123
+ 20,self_attn.v_proj,0.0000015647,0.01000,1.463
124
+ 20,self_attn.q_proj,0.0000275892,0.01000,1.478
125
+ 20,self_attn.o_proj,0.0000061421,0.01000,1.481
126
+ 20,mlp.gate_up_proj,0.0002926806,0.01000,1.729
127
+ 20,mlp.down_proj,0.0000471520,0.01000,7.562
128
+ 21,self_attn.k_proj,0.0000011719,0.01000,1.461
129
+ 21,self_attn.v_proj,0.0000013321,0.01000,1.463
130
+ 21,self_attn.q_proj,0.0000152681,0.01000,1.484
131
+ 21,self_attn.o_proj,0.0000083127,0.01000,1.489
132
+ 21,mlp.gate_up_proj,0.0003848628,0.01000,1.740
133
+ 21,mlp.down_proj,0.0000595929,0.01000,7.660
134
+ 22,self_attn.k_proj,0.0000017807,0.01000,1.454
135
+ 22,self_attn.v_proj,0.0000015420,0.01000,1.451
136
+ 22,self_attn.q_proj,0.0000268292,0.01000,1.478
137
+ 22,self_attn.o_proj,0.0000041537,0.01000,1.484
138
+ 22,mlp.gate_up_proj,0.0003500559,0.01000,1.730
139
+ 22,mlp.down_proj,0.0000500698,0.01000,7.605
140
+ 23,self_attn.k_proj,0.0000016139,0.01000,1.464
141
+ 23,self_attn.v_proj,0.0000014808,0.01000,1.459
142
+ 23,self_attn.q_proj,0.0000248134,0.01000,1.486
143
+ 23,self_attn.o_proj,0.0000014779,0.01000,1.489
144
+ 23,mlp.gate_up_proj,0.0003885686,0.01000,1.712
145
+ 23,mlp.down_proj,0.0000528654,0.01000,7.631
146
+ 24,self_attn.k_proj,0.0000015169,0.01000,1.470
147
+ 24,self_attn.v_proj,0.0000020076,0.01000,1.457
148
+ 24,self_attn.q_proj,0.0000164926,0.01000,1.486
149
+ 24,self_attn.o_proj,0.0000039185,0.01000,1.492
150
+ 24,mlp.gate_up_proj,0.0003459592,0.01000,1.705
151
+ 24,mlp.down_proj,0.0000413718,0.01000,7.560
152
+ 25,self_attn.k_proj,0.0000018769,0.01000,1.464
153
+ 25,self_attn.v_proj,0.0000016746,0.01000,1.461
154
+ 25,self_attn.q_proj,0.0000276867,0.01000,1.488
155
+ 25,self_attn.o_proj,0.0000052767,0.01000,1.502
156
+ 25,mlp.gate_up_proj,0.0002978532,0.01000,1.712
157
+ 25,mlp.down_proj,0.0000347387,0.01000,7.590
158
+ 26,self_attn.k_proj,0.0000019465,0.01000,1.476
159
+ 26,self_attn.v_proj,0.0000017029,0.01000,1.474
160
+ 26,self_attn.q_proj,0.0000291495,0.01000,1.490
161
+ 26,self_attn.o_proj,0.0000044357,0.01000,1.492
162
+ 26,mlp.gate_up_proj,0.0002969015,0.01000,1.711
163
+ 26,mlp.down_proj,0.0000329189,0.01000,7.628
164
+ 27,self_attn.k_proj,0.0000020213,0.01000,1.470
165
+ 27,self_attn.v_proj,0.0000017203,0.01000,1.471
166
+ 27,self_attn.q_proj,0.0000309315,0.01000,1.506
167
+ 27,self_attn.o_proj,0.0000044748,0.01000,1.511
168
+ 27,mlp.gate_up_proj,0.0002350978,0.01000,1.715
169
+ 27,mlp.down_proj,0.0000232943,0.01000,7.569
170
+ 28,self_attn.k_proj,0.0000023604,0.01000,1.462
171
+ 28,self_attn.v_proj,0.0000020458,0.01000,1.462
172
+ 28,self_attn.q_proj,0.0000347518,0.01000,1.491
173
+ 28,self_attn.o_proj,0.0000045274,0.01000,1.485
174
+ 28,mlp.gate_up_proj,0.0002729119,0.01000,1.701
175
+ 28,mlp.down_proj,0.0000285306,0.01000,7.618
176
+ 29,self_attn.k_proj,0.0000021586,0.01000,1.470
177
+ 29,self_attn.v_proj,0.0000019867,0.01000,1.478
178
+ 29,self_attn.q_proj,0.0000337302,0.01000,1.503
179
+ 29,self_attn.o_proj,0.0000056753,0.01000,1.520
180
+ 29,mlp.gate_up_proj,0.0002244616,0.01000,1.725
181
+ 29,mlp.down_proj,0.0000236669,0.01000,7.561
182
+ 30,self_attn.k_proj,0.0000027204,0.01000,1.476
183
+ 30,self_attn.v_proj,0.0000026023,0.01000,1.467
184
+ 30,self_attn.q_proj,0.0000439284,0.01000,1.501
185
+ 30,self_attn.o_proj,0.0000022253,0.01000,1.510
186
+ 30,mlp.gate_up_proj,0.0002042476,0.01000,1.730
187
+ 30,mlp.down_proj,0.0000183864,0.01000,7.612
188
+ 31,self_attn.k_proj,0.0000024369,0.01000,1.468
189
+ 31,self_attn.v_proj,0.0000025132,0.01000,1.463
190
+ 31,self_attn.q_proj,0.0000401313,0.01000,1.493
191
+ 31,self_attn.o_proj,0.0000029085,0.01000,1.502
192
+ 31,mlp.gate_up_proj,0.0001916675,0.01000,1.708
193
+ 31,mlp.down_proj,0.0000159329,0.01000,7.587
194
+ 32,self_attn.k_proj,0.0000025381,0.01000,1.481
195
+ 32,self_attn.v_proj,0.0000025244,0.01000,1.462
196
+ 32,self_attn.q_proj,0.0000429584,0.01000,1.493
197
+ 32,self_attn.o_proj,0.0000034857,0.01000,1.528
198
+ 32,mlp.gate_up_proj,0.0001787998,0.01000,1.716
199
+ 32,mlp.down_proj,0.0000138510,0.01000,7.573
200
+ 33,self_attn.k_proj,0.0000016484,0.01000,1.476
201
+ 33,self_attn.v_proj,0.0000016832,0.01000,1.470
202
+ 33,self_attn.q_proj,0.0000283093,0.01000,1.497
203
+ 33,self_attn.o_proj,0.0000032370,0.01000,1.507
204
+ 33,mlp.gate_up_proj,0.0001561947,0.01000,1.715
205
+ 33,mlp.down_proj,0.0000110007,0.01000,7.638
206
+ 34,self_attn.k_proj,0.0000017145,0.01000,1.467
207
+ 34,self_attn.v_proj,0.0000015622,0.01000,1.462
208
+ 34,self_attn.q_proj,0.0000274940,0.01000,1.498
209
+ 34,self_attn.o_proj,0.0000095224,0.01000,1.510
210
+ 34,mlp.gate_up_proj,0.0001135982,0.01000,1.709
211
+ 34,mlp.down_proj,0.0000074105,0.01000,7.623
212
+ 35,self_attn.k_proj,0.0000017571,0.01000,1.468
213
+ 35,self_attn.v_proj,0.0000015543,0.01000,1.462
214
+ 35,self_attn.q_proj,0.0000294392,0.01000,1.496
215
+ 35,self_attn.o_proj,0.0000078946,0.01000,1.511
216
+ 35,mlp.gate_up_proj,0.0000980800,0.01000,1.709
217
+ 35,mlp.down_proj,0.0000060432,0.01000,7.589
218
+ 36,self_attn.k_proj,0.0000019153,0.01000,1.471
219
+ 36,self_attn.v_proj,0.0000018007,0.01000,1.468
220
+ 36,self_attn.q_proj,0.0000344370,0.01000,1.484
221
+ 36,self_attn.o_proj,0.0000069584,0.01000,1.484
222
+ 36,mlp.gate_up_proj,0.0001005755,0.01000,1.703
223
+ 36,mlp.down_proj,0.0000062395,0.01000,7.587
224
+ 37,self_attn.k_proj,0.0000016588,0.01000,1.481
225
+ 37,self_attn.v_proj,0.0000014910,0.01000,1.457
226
+ 37,self_attn.q_proj,0.0000289659,0.01000,1.488
227
+ 37,self_attn.o_proj,0.0000086094,0.01000,1.483
228
+ 37,mlp.gate_up_proj,0.0000839967,0.01000,1.702
229
+ 37,mlp.down_proj,0.0000045383,0.01000,7.603
230
+ 38,self_attn.k_proj,0.0000023016,0.01000,1.474
231
+ 38,self_attn.v_proj,0.0000021658,0.01000,1.464
232
+ 38,self_attn.q_proj,0.0000395194,0.01000,1.508
233
+ 38,self_attn.o_proj,0.0000117600,0.01000,1.522
234
+ 38,mlp.gate_up_proj,0.0000848223,0.01000,1.713
235
+ 38,mlp.down_proj,0.0000048769,0.01000,7.654
236
+ 39,self_attn.k_proj,0.0000020998,0.01000,1.465
237
+ 39,self_attn.v_proj,0.0000018999,0.01000,1.480
238
+ 39,self_attn.q_proj,0.0000360059,0.01000,1.479
239
+ 39,self_attn.o_proj,0.0000082790,0.01000,1.483
240
+ 39,mlp.gate_up_proj,0.0000955068,0.01000,1.719
241
+ 39,mlp.down_proj,0.0000054320,0.01000,7.572
242
+ 40,self_attn.k_proj,0.0000023224,0.01000,1.453
243
+ 40,self_attn.v_proj,0.0000021529,0.01000,1.453
244
+ 40,self_attn.q_proj,0.0000407022,0.01000,1.501
245
+ 40,self_attn.o_proj,0.0000090580,0.01000,1.482
246
+ 40,mlp.gate_up_proj,0.0000766918,0.01000,1.723
247
+ 40,mlp.down_proj,0.0000034165,0.01000,7.562
248
+ 41,self_attn.k_proj,0.0000017955,0.01000,1.459
249
+ 41,self_attn.v_proj,0.0000018613,0.01000,1.483
250
+ 41,self_attn.q_proj,0.0000364734,0.01000,1.486
251
+ 41,self_attn.o_proj,0.0000047276,0.01000,1.476
252
+ 41,mlp.gate_up_proj,0.0000732840,0.01000,1.784
253
+ 41,mlp.down_proj,0.0000031072,0.01000,7.595
254
+ 42,self_attn.k_proj,0.0000021973,0.01000,1.491
255
+ 42,self_attn.v_proj,0.0000021920,0.01000,1.475
256
+ 42,self_attn.q_proj,0.0000412202,0.01000,1.503
257
+ 42,self_attn.o_proj,0.0000086394,0.01000,1.505
258
+ 42,mlp.gate_up_proj,0.0000708551,0.01000,1.755
259
+ 42,mlp.down_proj,0.0000030684,0.01000,7.644
260
+ 43,self_attn.k_proj,0.0000020708,0.01000,1.477
261
+ 43,self_attn.v_proj,0.0000019036,0.01000,1.463
262
+ 43,self_attn.q_proj,0.0000374847,0.01000,1.517
263
+ 43,self_attn.o_proj,0.0000087857,0.01000,1.527
264
+ 43,mlp.gate_up_proj,0.0000665897,0.01000,1.733
265
+ 43,mlp.down_proj,0.0000029445,0.01000,7.620
266
+ 44,self_attn.k_proj,0.0000020261,0.01000,1.493
267
+ 44,self_attn.v_proj,0.0000021684,0.01000,1.470
268
+ 44,self_attn.q_proj,0.0000415638,0.01000,1.503
269
+ 44,self_attn.o_proj,0.0000071209,0.01000,1.503
270
+ 44,mlp.gate_up_proj,0.0000632317,0.01000,1.741
271
+ 44,mlp.down_proj,0.0000027826,0.01000,7.630
272
+ 45,self_attn.k_proj,0.0000023057,0.01000,1.465
273
+ 45,self_attn.v_proj,0.0000022242,0.01000,1.497
274
+ 45,self_attn.q_proj,0.0000447638,0.01000,1.490
275
+ 45,self_attn.o_proj,0.0000077642,0.01000,1.488
276
+ 45,mlp.gate_up_proj,0.0000578301,0.01000,1.703
277
+ 45,mlp.down_proj,0.0000024634,0.01000,7.574
278
+ 46,self_attn.k_proj,0.0000021371,0.01000,1.480
279
+ 46,self_attn.v_proj,0.0000022514,0.01000,1.468
280
+ 46,self_attn.q_proj,0.0000415123,0.01000,1.501
281
+ 46,self_attn.o_proj,0.0000079321,0.01000,1.522
282
+ 46,mlp.gate_up_proj,0.0000597282,0.01000,1.716
283
+ 46,mlp.down_proj,0.0000029196,0.01000,7.602
284
+ 47,self_attn.k_proj,0.0000020188,0.01000,1.474
285
+ 47,self_attn.v_proj,0.0000021556,0.01000,1.475
286
+ 47,self_attn.q_proj,0.0000380187,0.01000,1.495
287
+ 47,self_attn.o_proj,0.0000078174,0.01000,1.496
288
+ 47,mlp.gate_up_proj,0.0000619655,0.01000,1.710
289
+ 47,mlp.down_proj,0.0000029080,0.01000,7.562
290
+ 48,self_attn.k_proj,0.0000020118,0.01000,1.463
291
+ 48,self_attn.v_proj,0.0000020413,0.01000,1.454
292
+ 48,self_attn.q_proj,0.0000384828,0.01000,1.488
293
+ 48,self_attn.o_proj,0.0000065230,0.01000,1.483
294
+ 48,mlp.gate_up_proj,0.0000597506,0.01000,1.701
295
+ 48,mlp.down_proj,0.0000031472,0.01000,7.555
296
+ 49,self_attn.k_proj,0.0000021438,0.01000,1.467
297
+ 49,self_attn.v_proj,0.0000023447,0.01000,1.474
298
+ 49,self_attn.q_proj,0.0000399775,0.01000,1.490
299
+ 49,self_attn.o_proj,0.0000052020,0.01000,1.519
300
+ 49,mlp.gate_up_proj,0.0000955224,0.01000,1.709
301
+ 49,mlp.down_proj,0.0000061595,0.01000,7.569
302
+ 50,self_attn.k_proj,0.0000021562,0.01000,1.461
303
+ 50,self_attn.v_proj,0.0000025434,0.01000,1.453
304
+ 50,self_attn.q_proj,0.0000386181,0.01000,1.492
305
+ 50,self_attn.o_proj,0.0000068404,0.01000,1.514
306
+ 50,mlp.gate_up_proj,0.0001173386,0.01000,1.720
307
+ 50,mlp.down_proj,0.0000084913,0.01000,7.627
308
+ 51,self_attn.k_proj,0.0000020790,0.01000,1.467
309
+ 51,self_attn.v_proj,0.0000020287,0.01000,1.458
310
+ 51,self_attn.q_proj,0.0000349596,0.01000,1.489
311
+ 51,self_attn.o_proj,0.0000034838,0.01000,1.497
312
+ 51,mlp.gate_up_proj,0.0001156424,0.01000,1.712
313
+ 51,mlp.down_proj,0.0000085517,0.01000,7.662
314
+ 52,self_attn.k_proj,0.0000022620,0.01000,1.460
315
+ 52,self_attn.v_proj,0.0000021438,0.01000,1.471
316
+ 52,self_attn.q_proj,0.0000390562,0.01000,1.493
317
+ 52,self_attn.o_proj,0.0000040099,0.01000,1.506
318
+ 52,mlp.gate_up_proj,0.0001115195,0.01000,1.702
319
+ 52,mlp.down_proj,0.0000080531,0.01000,7.579
320
+ 53,self_attn.k_proj,0.0000018874,0.01000,1.480
321
+ 53,self_attn.v_proj,0.0000022225,0.01000,1.470
322
+ 53,self_attn.q_proj,0.0000420450,0.01000,1.520
323
+ 53,self_attn.o_proj,0.0000016867,0.01000,1.496
324
+ 53,mlp.gate_up_proj,0.0001137422,0.01000,1.720
325
+ 53,mlp.down_proj,0.0000079909,0.01000,7.583
326
+ 54,self_attn.k_proj,0.0000020107,0.01000,1.466
327
+ 54,self_attn.v_proj,0.0000024297,0.01000,1.462
328
+ 54,self_attn.q_proj,0.0000351269,0.01000,1.527
329
+ 54,self_attn.o_proj,0.0000011870,0.01000,1.489
330
+ 54,mlp.gate_up_proj,0.0001209137,0.01000,1.714
331
+ 54,mlp.down_proj,0.0000088745,0.01000,7.595
332
+ 55,self_attn.k_proj,0.0000019664,0.01000,1.507
333
+ 55,self_attn.v_proj,0.0000021142,0.01000,1.462
334
+ 55,self_attn.q_proj,0.0000306443,0.01000,1.487
335
+ 55,self_attn.o_proj,0.0000011229,0.01000,1.484
336
+ 55,mlp.gate_up_proj,0.0001148883,0.01000,1.742
337
+ 55,mlp.down_proj,0.0000083420,0.01000,7.584
338
+ 56,self_attn.k_proj,0.0000018952,0.01000,1.471
339
+ 56,self_attn.v_proj,0.0000024311,0.01000,1.468
340
+ 56,self_attn.q_proj,0.0000339106,0.01000,1.495
341
+ 56,self_attn.o_proj,0.0000021875,0.01000,1.501
342
+ 56,mlp.gate_up_proj,0.0001284902,0.01000,1.720
343
+ 56,mlp.down_proj,0.0000107869,0.01000,7.647
344
+ 57,self_attn.k_proj,0.0000020037,0.01000,1.472
345
+ 57,self_attn.v_proj,0.0000019004,0.01000,1.464
346
+ 57,self_attn.q_proj,0.0000364774,0.01000,1.512
347
+ 57,self_attn.o_proj,0.0000063478,0.01000,1.494
348
+ 57,mlp.gate_up_proj,0.0001514510,0.01000,1.712
349
+ 57,mlp.down_proj,0.0000157364,0.01000,7.578
350
+ 58,self_attn.k_proj,0.0000019591,0.01000,1.485
351
+ 58,self_attn.v_proj,0.0000019817,0.01000,1.478
352
+ 58,self_attn.q_proj,0.0000361585,0.01000,1.492
353
+ 58,self_attn.o_proj,0.0000021275,0.01000,1.508
354
+ 58,mlp.gate_up_proj,0.0002106334,0.01000,1.716
355
+ 58,mlp.down_proj,0.0000329273,0.01000,7.616
356
+ 59,self_attn.k_proj,0.0000019843,0.01000,1.457
357
+ 59,self_attn.v_proj,0.0000017783,0.01000,1.459
358
+ 59,self_attn.q_proj,0.0000330942,0.01000,1.514
359
+ 59,self_attn.o_proj,0.0000035600,0.01000,1.507
360
+ 59,mlp.gate_up_proj,0.0002280282,0.01000,1.699
361
+ 59,mlp.down_proj,0.0000447744,0.01000,7.568
362
+ 60,self_attn.k_proj,0.0000018077,0.01000,1.471
363
+ 60,self_attn.v_proj,0.0000016396,0.01000,1.456
364
+ 60,self_attn.q_proj,0.0000325550,0.01000,1.487
365
+ 60,self_attn.o_proj,0.0000054739,0.01000,1.485
366
+ 60,mlp.gate_up_proj,0.0002405127,0.01000,1.713
367
+ 60,mlp.down_proj,0.0000497696,0.01000,7.569
quantize_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "desc_act": true,
5
+ "sym": true,
6
+ "lm_head": false,
7
+ "quant_method": "gptq",
8
+ "checkpoint_format": "gptq",
9
+ "pack_dtype": "int32",
10
+ "meta": {
11
+ "quantizer": [
12
+ "gptqmodel:3.0.0-dev"
13
+ ],
14
+ "uri": "https://github.com/modelcloud/gptqmodel",
15
+ "damp_percent": 0.01,
16
+ "damp_auto_increment": 0.0025,
17
+ "static_groups": false,
18
+ "true_sequential": true,
19
+ "mse": 0.0,
20
+ "v2": false,
21
+ "v2_alpha": 0.25
22
+ }
23
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "[MASK]",
5
+ "[gMASK]",
6
+ "[sMASK]",
7
+ "<sop>",
8
+ "<eop>",
9
+ "<|system|>",
10
+ "<|user|>",
11
+ "<|assistant|>",
12
+ "<|observation|>",
13
+ "<|begin_of_image|>",
14
+ "<|end_of_image|>",
15
+ "<|begin_of_video|>",
16
+ "<|end_of_video|>"
17
+ ],
18
+ "eos_token": {
19
+ "content": "<|endoftext|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": "<|endoftext|>"
26
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c
3
+ size 19966496
tokenizer_config.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "151329": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "151330": {
12
+ "content": "[MASK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "151331": {
20
+ "content": "[gMASK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "151332": {
28
+ "content": "[sMASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "151333": {
36
+ "content": "<sop>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "151334": {
44
+ "content": "<eop>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "151335": {
52
+ "content": "<|system|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "151336": {
60
+ "content": "<|user|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "151337": {
68
+ "content": "<|assistant|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "151338": {
76
+ "content": "<|observation|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "151339": {
84
+ "content": "<|begin_of_image|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "151340": {
92
+ "content": "<|end_of_image|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "151341": {
100
+ "content": "<|begin_of_video|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "151342": {
108
+ "content": "<|end_of_video|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ }
115
+ },
116
+ "additional_special_tokens": [
117
+ "<|endoftext|>",
118
+ "[MASK]",
119
+ "[gMASK]",
120
+ "[sMASK]",
121
+ "<sop>",
122
+ "<eop>",
123
+ "<|system|>",
124
+ "<|user|>",
125
+ "<|assistant|>",
126
+ "<|observation|>",
127
+ "<|begin_of_image|>",
128
+ "<|end_of_image|>",
129
+ "<|begin_of_video|>",
130
+ "<|end_of_video|>"
131
+ ],
132
+ "clean_up_tokenization_spaces": false,
133
+ "do_lower_case": false,
134
+ "eos_token": "<|endoftext|>",
135
+ "extra_special_tokens": {},
136
+ "model_input_names": [
137
+ "input_ids",
138
+ "attention_mask"
139
+ ],
140
+ "model_max_length": 128000,
141
+ "pad_token": "<|endoftext|>",
142
+ "padding_side": "left",
143
+ "remove_space": false,
144
+ "tokenizer_class": "PreTrainedTokenizerFast",
145
+ "_commit_hash": null
146
+ }