Spaces:

ethanlshen
/

SuperposedDecoding

Runtime error

ethanlshen commited on Jun 25, 2024

Commit

c57b12c

verified ·

1 Parent(s): 86a0d38

Update superposed/llama/tokenizer.py

Files changed (1) hide show

superposed/llama/tokenizer.py CHANGED Viewed

@@ -35,7 +35,7 @@ class Tokenizer:
         )
         assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
-    def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
         """
         Encodes a string into a list of token IDs.
@@ -47,15 +47,16 @@ class Tokenizer:
         Returns:
             List[int]: A list of token IDs.
         """
-        assert type(s) is str
         t = self.sp_model.encode(s)
         if bos:
-            t = [self.bos_id] + t
         if eos:
-            t = t + [self.eos_id]
         return t
-    def decode(self, t: List[int]) -> str:
         """
         Decodes a list of token IDs into a string.

         )
         assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
+    def encode(self, s, bos: bool, eos: bool):
         """
         Encodes a string into a list of token IDs.
         Returns:
             List[int]: A list of token IDs.
         """
         t = self.sp_model.encode(s)
         if bos:
+            for i in range(len(t)):
+                t[i] = [self.bos_id] + t[i]
         if eos:
+            for i in range(len(t)):
+                t[i] = t[i] + [self.eos_id]
         return t
+    def decode(self, t):
         """
         Decodes a list of token IDs into a string.