basic-go commited on
Commit
c4b83e2
·
verified ·
1 Parent(s): ff51e6e

Upload tokenizer

Browse files
added_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_99>",
4
+ "<extra_id_98>",
5
+ "<extra_id_97>",
6
+ "<extra_id_96>",
7
+ "<extra_id_95>",
8
+ "<extra_id_94>",
9
+ "<extra_id_93>",
10
+ "<extra_id_92>",
11
+ "<extra_id_91>",
12
+ "<extra_id_90>",
13
+ "<extra_id_89>",
14
+ "<extra_id_88>",
15
+ "<extra_id_87>",
16
+ "<extra_id_86>",
17
+ "<extra_id_85>",
18
+ "<extra_id_84>",
19
+ "<extra_id_83>",
20
+ "<extra_id_82>",
21
+ "<extra_id_81>",
22
+ "<extra_id_80>",
23
+ "<extra_id_79>",
24
+ "<extra_id_78>",
25
+ "<extra_id_77>",
26
+ "<extra_id_76>",
27
+ "<extra_id_75>",
28
+ "<extra_id_74>",
29
+ "<extra_id_73>",
30
+ "<extra_id_72>",
31
+ "<extra_id_71>",
32
+ "<extra_id_70>",
33
+ "<extra_id_69>",
34
+ "<extra_id_68>",
35
+ "<extra_id_67>",
36
+ "<extra_id_66>",
37
+ "<extra_id_65>",
38
+ "<extra_id_64>",
39
+ "<extra_id_63>",
40
+ "<extra_id_62>",
41
+ "<extra_id_61>",
42
+ "<extra_id_60>",
43
+ "<extra_id_59>",
44
+ "<extra_id_58>",
45
+ "<extra_id_57>",
46
+ "<extra_id_56>",
47
+ "<extra_id_55>",
48
+ "<extra_id_54>",
49
+ "<extra_id_53>",
50
+ "<extra_id_52>",
51
+ "<extra_id_51>",
52
+ "<extra_id_50>",
53
+ "<extra_id_49>",
54
+ "<extra_id_48>",
55
+ "<extra_id_47>",
56
+ "<extra_id_46>",
57
+ "<extra_id_45>",
58
+ "<extra_id_44>",
59
+ "<extra_id_43>",
60
+ "<extra_id_42>",
61
+ "<extra_id_41>",
62
+ "<extra_id_40>",
63
+ "<extra_id_39>",
64
+ "<extra_id_38>",
65
+ "<extra_id_37>",
66
+ "<extra_id_36>",
67
+ "<extra_id_35>",
68
+ "<extra_id_34>",
69
+ "<extra_id_33>",
70
+ "<extra_id_32>",
71
+ "<extra_id_31>",
72
+ "<extra_id_30>",
73
+ "<extra_id_29>",
74
+ "<extra_id_28>",
75
+ "<extra_id_27>",
76
+ "<extra_id_26>",
77
+ "<extra_id_25>",
78
+ "<extra_id_24>",
79
+ "<extra_id_23>",
80
+ "<extra_id_22>",
81
+ "<extra_id_21>",
82
+ "<extra_id_20>",
83
+ "<extra_id_19>",
84
+ "<extra_id_18>",
85
+ "<extra_id_17>",
86
+ "<extra_id_16>",
87
+ "<extra_id_15>",
88
+ "<extra_id_14>",
89
+ "<extra_id_13>",
90
+ "<extra_id_12>",
91
+ "<extra_id_11>",
92
+ "<extra_id_10>",
93
+ "<extra_id_9>",
94
+ "<extra_id_8>",
95
+ "<extra_id_7>",
96
+ "<extra_id_6>",
97
+ "<extra_id_5>",
98
+ "<extra_id_4>",
99
+ "<extra_id_3>",
100
+ "<extra_id_2>",
101
+ "<extra_id_1>",
102
+ "<extra_id_0>"
103
+ ],
104
+ "bos_token": {
105
+ "content": "<s>",
106
+ "lstrip": false,
107
+ "normalized": true,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "cls_token": {
112
+ "content": "<s>",
113
+ "lstrip": false,
114
+ "normalized": true,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "eos_token": {
119
+ "content": "</s>",
120
+ "lstrip": false,
121
+ "normalized": true,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ },
125
+ "mask_token": {
126
+ "content": "<mask>",
127
+ "lstrip": true,
128
+ "normalized": true,
129
+ "rstrip": false,
130
+ "single_word": false
131
+ },
132
+ "pad_token": {
133
+ "content": "<pad>",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false
138
+ },
139
+ "sep_token": {
140
+ "content": "</s>",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false
145
+ },
146
+ "unk_token": {
147
+ "content": "<unk>",
148
+ "lstrip": false,
149
+ "normalized": true,
150
+ "rstrip": false,
151
+ "single_word": false
152
+ }
153
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff