Update app.py
Browse files
app.py
CHANGED
@@ -84,12 +84,17 @@ try:
|
|
84 |
# Check if the tokenizer is saved, if not, add custom tokens
|
85 |
if not os.path.exists(tokenizer_path):
|
86 |
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
87 |
-
|
|
|
|
|
|
|
|
|
88 |
tokenizer.save_pretrained(tokenizer_path)
|
89 |
else:
|
90 |
tokenizer = T5Tokenizer.from_pretrained(tokenizer_path, local_files_only=True)
|
91 |
|
92 |
model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True, ignore_mismatched_sizes=True)
|
|
|
93 |
device = torch.device("cpu")
|
94 |
model.to(device)
|
95 |
model_loaded = True
|
|
|
84 |
# Check if the tokenizer is saved, if not, add custom tokens
|
85 |
if not os.path.exists(tokenizer_path):
|
86 |
tokenizer = T5Tokenizer.from_pretrained("t5-small")
|
87 |
+
|
88 |
+
# Add the custom token and ensure the tokenizer is saved properly
|
89 |
+
added_tokens = tokenizer.add_tokens(['<extra_id_99>']) # Add custom token if required
|
90 |
+
|
91 |
+
# Ensure the tokenization process handles the vocabulary correctly
|
92 |
tokenizer.save_pretrained(tokenizer_path)
|
93 |
else:
|
94 |
tokenizer = T5Tokenizer.from_pretrained(tokenizer_path, local_files_only=True)
|
95 |
|
96 |
model = T5ForConditionalGeneration.from_pretrained(model_path, local_files_only=True, ignore_mismatched_sizes=True)
|
97 |
+
model.resize_token_embeddings(len(tokenizer)) # Resize the embeddings to match tokenizer size
|
98 |
device = torch.device("cpu")
|
99 |
model.to(device)
|
100 |
model_loaded = True
|