Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import AutoTokenizer
|
3 |
import random
|
4 |
|
5 |
# List of available tokenizers
|
@@ -54,7 +54,10 @@ def generate_colored_html(tokens, decoded_tokens):
|
|
54 |
return html_output
|
55 |
|
56 |
def tokenize_text(text, tokenizer_name):
|
57 |
-
|
|
|
|
|
|
|
58 |
tokens = tokenizer.encode(text, add_special_tokens=True)
|
59 |
decoded_tokens = [tokenizer.decode(token) for token in tokens]
|
60 |
html_output = generate_colored_html(tokens, decoded_tokens)
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, GPT2TokenizerFast
|
3 |
import random
|
4 |
|
5 |
# List of available tokenizers
|
|
|
54 |
return html_output
|
55 |
|
56 |
def tokenize_text(text, tokenizer_name):
|
57 |
+
if tokenizer_name.split("/")[0]=="Xenova":
|
58 |
+
tokenizer = GPT2TokenizerFast.from_pretrained(tokenizer_name)
|
59 |
+
else :
|
60 |
+
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
61 |
tokens = tokenizer.encode(text, add_special_tokens=True)
|
62 |
decoded_tokens = [tokenizer.decode(token) for token in tokens]
|
63 |
html_output = generate_colored_html(tokens, decoded_tokens)
|