Spaces:
Running on Zero

Ruurd commited on
Commit
a73a13e
·
verified ·
1 Parent(s): ea86b58

Fix MASK token noising

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -115,7 +115,7 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
115
  noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
116
  for idx, val in zip(noised_indices, noise):
117
  noised[idx] = val
118
- noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)
119
 
120
  return noised, noised_indices
121
 
 
115
  noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
116
  for idx, val in zip(noised_indices, noise):
117
  noised[idx] = val
118
+ noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)[0]
119
 
120
  return noised, noised_indices
121