AlGe commited on
Commit
4fac050
·
verified ·
1 Parent(s): 8f0428b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer
3
  import random
4
 
5
  # List of available tokenizers
@@ -54,7 +54,10 @@ def generate_colored_html(tokens, decoded_tokens):
54
  return html_output
55
 
56
  def tokenize_text(text, tokenizer_name):
57
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
 
 
 
58
  tokens = tokenizer.encode(text, add_special_tokens=True)
59
  decoded_tokens = [tokenizer.decode(token) for token in tokens]
60
  html_output = generate_colored_html(tokens, decoded_tokens)
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, GPT2TokenizerFast
3
  import random
4
 
5
  # List of available tokenizers
 
54
  return html_output
55
 
56
  def tokenize_text(text, tokenizer_name):
57
+ if tokenizer_name.split("/")[0]=="Xenova":
58
+ tokenizer = GPT2TokenizerFast.from_pretrained(tokenizer_name)
59
+ else :
60
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
61
  tokens = tokenizer.encode(text, add_special_tokens=True)
62
  decoded_tokens = [tokenizer.decode(token) for token in tokens]
63
  html_output = generate_colored_html(tokens, decoded_tokens)