Spaces:
Runtime error
Runtime error
Update superposed/llama/tokenizer.py
Browse files
superposed/llama/tokenizer.py
CHANGED
@@ -35,7 +35,7 @@ class Tokenizer:
|
|
35 |
)
|
36 |
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
37 |
|
38 |
-
def encode(self, s
|
39 |
"""
|
40 |
Encodes a string into a list of token IDs.
|
41 |
|
@@ -47,15 +47,16 @@ class Tokenizer:
|
|
47 |
Returns:
|
48 |
List[int]: A list of token IDs.
|
49 |
"""
|
50 |
-
assert type(s) is str
|
51 |
t = self.sp_model.encode(s)
|
52 |
if bos:
|
53 |
-
|
|
|
54 |
if eos:
|
55 |
-
|
|
|
56 |
return t
|
57 |
|
58 |
-
def decode(self, t
|
59 |
"""
|
60 |
Decodes a list of token IDs into a string.
|
61 |
|
|
|
35 |
)
|
36 |
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
37 |
|
38 |
+
def encode(self, s, bos: bool, eos: bool):
|
39 |
"""
|
40 |
Encodes a string into a list of token IDs.
|
41 |
|
|
|
47 |
Returns:
|
48 |
List[int]: A list of token IDs.
|
49 |
"""
|
|
|
50 |
t = self.sp_model.encode(s)
|
51 |
if bos:
|
52 |
+
for i in range(len(t)):
|
53 |
+
t[i] = [self.bos_id] + t[i]
|
54 |
if eos:
|
55 |
+
for i in range(len(t)):
|
56 |
+
t[i] = t[i] + [self.eos_id]
|
57 |
return t
|
58 |
|
59 |
+
def decode(self, t):
|
60 |
"""
|
61 |
Decodes a list of token IDs into a string.
|
62 |
|