Spaces:
Running
on
Zero
Running
on
Zero
File size: 638 Bytes
8db7949 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
import warnings
from transformers import AutoTokenizer
def assert_tokenizer_consistency(model_id_1, model_id_2):
identical_tokenizers = (
AutoTokenizer.from_pretrained(model_id_1).vocab
== AutoTokenizer.from_pretrained(model_id_2).vocab
)
if not identical_tokenizers:
warnings.warn(
f"Warning: Tokenizers for models '{model_id_1}' and '{model_id_2}' have different vocabularies. "
f"This may lead to inconsistent results when comparing these models. "
f"Consider using models with compatible tokenizers.",
UserWarning
)
|