hadheedo commited on
Commit
0a99a64
·
verified ·
1 Parent(s): fc91025

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -84,12 +84,17 @@ try:
84
  # Check if the tokenizer is saved, if not, add custom tokens
85
  if not os.path.exists(tokenizer_path):
86
  tokenizer = T5Tokenizer.from_pretrained("t5-small")
87
- tokenizer.add_tokens(['<extra_id_99>']) # Add custom token if required
 
 
 
 
88
  tokenizer.save_pretrained(tokenizer_path)
89
  else:
90
  tokenizer = T5Tokenizer.from_pretrained(tokenizer_path, local_files_only=True)
91
 
92
  model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True, ignore_mismatched_sizes=True)
 
93
  device = torch.device("cpu")
94
  model.to(device)
95
  model_loaded = True
 
84
  # Check if the tokenizer is saved, if not, add custom tokens
85
  if not os.path.exists(tokenizer_path):
86
  tokenizer = T5Tokenizer.from_pretrained("t5-small")
87
+
88
+ # Add the custom token and ensure the tokenizer is saved properly
89
+ added_tokens = tokenizer.add_tokens(['<extra_id_99>']) # Add custom token if required
90
+
91
+ # Ensure the tokenization process handles the vocabulary correctly
92
  tokenizer.save_pretrained(tokenizer_path)
93
  else:
94
  tokenizer = T5Tokenizer.from_pretrained(tokenizer_path, local_files_only=True)
95
 
96
  model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True, ignore_mismatched_sizes=True)
97
+ model.resize_token_embeddings(len(tokenizer)) # Resize the embeddings to match tokenizer size
98
  device = torch.device("cpu")
99
  model.to(device)
100
  model_loaded = True