Spaces:
Running
on
Zero
Running
on
Zero
Fix MASK token noising
Browse files
app.py
CHANGED
@@ -115,7 +115,7 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
|
|
115 |
noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
|
116 |
for idx, val in zip(noised_indices, noise):
|
117 |
noised[idx] = val
|
118 |
-
noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)
|
119 |
|
120 |
return noised, noised_indices
|
121 |
|
|
|
115 |
noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
|
116 |
for idx, val in zip(noised_indices, noise):
|
117 |
noised[idx] = val
|
118 |
+
noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)[0]
|
119 |
|
120 |
return noised, noised_indices
|
121 |
|