File size: 507 Bytes
12f548d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from idiomify.fetchers import fetch_tokenizer
def main():
tokenizer = fetch_tokenizer("t-1-1")
print(tokenizer.bos_token)
print(tokenizer.cls_token)
print(tokenizer.eos_token)
print(tokenizer.sep_token)
print(tokenizer.mask_token)
print(tokenizer.pad_token)
print(tokenizer.unk_token)
print(tokenizer.additional_special_tokens) # this should have been added
"""
<s>
<s>
</s>
</s>
<mask>
<pad>
<unk>
['<idiom>', '</idiom>']
"""
if __name__ == '__main__':
main()
|