Spaces:

hadheedo
/

Summary

Running

hadheedo commited on 22 days ago

Commit

0a99a64

verified ·

1 Parent(s): fc91025

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -84,12 +84,17 @@ try:
     # Check if the tokenizer is saved, if not, add custom tokens
     if not os.path.exists(tokenizer_path):
         tokenizer = T5Tokenizer.from_pretrained("t5-small")
-        tokenizer.add_tokens(['<extra_id_99>'])  # Add custom token if required
         tokenizer.save_pretrained(tokenizer_path)
     else:
         tokenizer = T5Tokenizer.from_pretrained(tokenizer_path, local_files_only=True)
     model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True, ignore_mismatched_sizes=True)
     device = torch.device("cpu")
     model.to(device)
     model_loaded = True

     # Check if the tokenizer is saved, if not, add custom tokens
     if not os.path.exists(tokenizer_path):
         tokenizer = T5Tokenizer.from_pretrained("t5-small")
+        # Add the custom token and ensure the tokenizer is saved properly
+        added_tokens = tokenizer.add_tokens(['<extra_id_99>'])  # Add custom token if required
+        # Ensure the tokenization process handles the vocabulary correctly
         tokenizer.save_pretrained(tokenizer_path)
     else:
         tokenizer = T5Tokenizer.from_pretrained(tokenizer_path, local_files_only=True)
     model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True, ignore_mismatched_sizes=True)
+    model.resize_token_embeddings(len(tokenizer))  # Resize the embeddings to match tokenizer size
     device = torch.device("cpu")
     model.to(device)
     model_loaded = True