Spaces:
Running on Zero

Ruurd commited on
Commit
a86f3af
·
verified ·
1 Parent(s): 7d7b6d7

Add MASK slider

Browse files
Files changed (1) hide show
  1. app.py +31 -17
app.py CHANGED
@@ -89,17 +89,24 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
89
  x = i / max_it
90
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
91
 
92
- def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clustering=0.5, noise_start = 1.0):
93
  noised = input_ids.copy()
94
  answer_len = len(noised) - answer_start
95
  num_to_noise = int(threshold * answer_len * noise_start)
 
96
 
97
  if num_to_noise == 0:
98
  return noised, []
99
 
100
  mixed_probs = token_probabilities.copy()
101
- mixed_probs[eot_token_id] *= eot_weight
102
- mixed_probs /= mixed_probs.sum()
 
 
 
 
 
 
103
 
104
  num_clusters = max(1, int((1 - clustering) * num_to_noise))
105
  cluster_size = max(1, int(num_to_noise / num_clusters))
@@ -116,17 +123,17 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
116
  noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
117
  for idx, val in zip(noised_indices, noise):
118
  noised[idx] = val
119
- noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)[0]
120
 
121
  return noised, noised_indices
122
 
123
 
124
  # Add new noising function
125
- def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, eot_weight = 1.0, noise_start = 1.0):
126
  noised = input_ids.copy()
127
  answer_len = len(input_ids) - answer_start
128
  num_to_noise = int(threshold * answer_len * noise_start)
129
-
 
130
  if num_to_noise == 0:
131
  return noised
132
 
@@ -151,8 +158,14 @@ def confidence_guided_noising(input_ids, answer_start, confidences, noise_clippi
151
  )
152
 
153
  mixed_probs = token_probabilities.copy()
154
- mixed_probs[eot_token_id] *= eot_weight
155
- mixed_probs /= mixed_probs.sum()
 
 
 
 
 
 
156
 
157
  noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
158
  for idx, val in zip(indices, noise):
@@ -177,7 +190,7 @@ def generate_diffusion_text(input_ids):
177
  return sampled, conf
178
 
179
  # --- Inference Wrapper ---
180
- def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, clustering, noise_start, use_confidence_noising, noise_clipping):
181
  placeholder = "What do you know about the city of New York?"
182
  if question.strip() == "":
183
  question = placeholder
@@ -197,7 +210,7 @@ def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, cluste
197
 
198
  ori_input_tokens = input_ids
199
  current_tokens, just_noised_indices = noisify_answer(
200
- input_ids, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering, noise_start = 1.0,
201
  )
202
  yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
203
  time.sleep(pause_length)
@@ -244,19 +257,19 @@ def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, cluste
244
  threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
245
  if use_confidence_noising:
246
  noised_answer = confidence_guided_noising(
247
- current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, eot_weight=eot_weight, noise_start=noise_start
248
  )
249
  just_noised_indices = []
250
  else:
251
  noised_answer, just_noised_indices = noisify_answer(
252
- current_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering, noise_start = noise_start,
253
  )
254
 
255
  # Compose full input again: prompt + noised answer
256
  current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
257
 
258
  # --- RED HIGHLIGHT ---
259
- decoded_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
260
  highlighted = []
261
  for j, tok in enumerate(decoded_tokens):
262
  tok_id = tokenizer.convert_tokens_to_ids(tok)
@@ -288,14 +301,15 @@ demo = gr.Interface(
288
  fn=diffusion_chat,
289
  inputs=[
290
  gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of New York?"),
291
- gr.Slider(0, 1, value=0.4, step=0.05, label="↓ = longer answers (EOT weight)"),
292
- gr.Slider(1, 512, value=64, step=1, label=" = more iterations"),
 
293
  gr.Slider(0.01, 5, value=0.01, step=0.01, label="↑ = longer pause (for visualization)"),
294
  gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
295
- gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more clustered noising (fewer, larger edits)"),
296
  gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more noise (noise start)"),
297
  gr.Checkbox(value=False, label="Use confidence-guided noising"),
298
- gr.Slider(0.01, 1.0, value=0.05, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
299
 
300
  ],
301
  outputs=[gr.HTML(label="Diffusion Output")],
 
89
  x = i / max_it
90
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
91
 
92
+ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, mask_weight=0.0, clustering=0.5, noise_start = 1.0):
93
  noised = input_ids.copy()
94
  answer_len = len(noised) - answer_start
95
  num_to_noise = int(threshold * answer_len * noise_start)
96
+ mask_token_id = tokenizer.encode('MASK', add_special_tokens = False)[0]
97
 
98
  if num_to_noise == 0:
99
  return noised, []
100
 
101
  mixed_probs = token_probabilities.copy()
102
+
103
+ # Scale all other probabilities so they sum to 1 - mask_weight
104
+ total_other = mixed_probs.sum() - mixed_probs[mask_token_id]
105
+ scale = (1.0 - mask_weight) / total_other
106
+ mixed_probs *= scale
107
+
108
+ # Set mask_token_id to mask_weight explicitly
109
+ mixed_probs[mask_token_id] = mask_weight
110
 
111
  num_clusters = max(1, int((1 - clustering) * num_to_noise))
112
  cluster_size = max(1, int(num_to_noise / num_clusters))
 
123
  noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
124
  for idx, val in zip(noised_indices, noise):
125
  noised[idx] = val
 
126
 
127
  return noised, noised_indices
128
 
129
 
130
  # Add new noising function
131
+ def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, eot_weight = 1.0, mask_weight = 0.0, noise_start = 1.0):
132
  noised = input_ids.copy()
133
  answer_len = len(input_ids) - answer_start
134
  num_to_noise = int(threshold * answer_len * noise_start)
135
+ mask_token_id = tokenizer.encode('MASK', add_special_tokens = False)[0]
136
+
137
  if num_to_noise == 0:
138
  return noised
139
 
 
158
  )
159
 
160
  mixed_probs = token_probabilities.copy()
161
+
162
+ # Scale all other probabilities so they sum to 1 - mask_weight
163
+ total_other = mixed_probs.sum() - mixed_probs[mask_token_id]
164
+ scale = (1.0 - mask_weight) / total_other
165
+ mixed_probs *= scale
166
+
167
+ # Set mask_token_id to mask_weight explicitly
168
+ mixed_probs[mask_token_id] = mask_weight
169
 
170
  noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
171
  for idx, val in zip(indices, noise):
 
190
  return sampled, conf
191
 
192
  # --- Inference Wrapper ---
193
+ def diffusion_chat(question, eot_weight, mask_weight, max_it, pause_length, sharpness, clustering, noise_start, use_confidence_noising, noise_clipping):
194
  placeholder = "What do you know about the city of New York?"
195
  if question.strip() == "":
196
  question = placeholder
 
210
 
211
  ori_input_tokens = input_ids
212
  current_tokens, just_noised_indices = noisify_answer(
213
+ input_ids, answer_start, threshold=1.0, eot_weight=eot_weight, mask_weight=mask_weight, clustering=clustering, noise_start = 1.0,
214
  )
215
  yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
216
  time.sleep(pause_length)
 
257
  threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
258
  if use_confidence_noising:
259
  noised_answer = confidence_guided_noising(
260
+ current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, eot_weight=eot_weight, mask_weight=mask_weight, noise_start=noise_start
261
  )
262
  just_noised_indices = []
263
  else:
264
  noised_answer, just_noised_indices = noisify_answer(
265
+ current_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, mask_weight=mask_weight, clustering=clustering, noise_start = noise_start,
266
  )
267
 
268
  # Compose full input again: prompt + noised answer
269
  current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
270
 
271
  # --- RED HIGHLIGHT ---
272
+ decoded_tokens = tokenizer.convert_ids_to_tokens(previous_tokens[answer_start:])
273
  highlighted = []
274
  for j, tok in enumerate(decoded_tokens):
275
  tok_id = tokenizer.convert_tokens_to_ids(tok)
 
301
  fn=diffusion_chat,
302
  inputs=[
303
  gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of New York?"),
304
+ gr.Slider(0, 1, value=0.5, step=0.05, label="↓ = longer answers (EOT weight)"),
305
+ gr.Slider(0, 1, value=0.5, step=0.05, label=" = more random answers (MASK weight)"),
306
+ gr.Slider(1, 512, value=32, step=1, label="↑ = more iterations"),
307
  gr.Slider(0.01, 5, value=0.01, step=0.01, label="↑ = longer pause (for visualization)"),
308
  gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
309
+ gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="↑ = more clustered noising (fewer, larger edits)"),
310
  gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more noise (noise start)"),
311
  gr.Checkbox(value=False, label="Use confidence-guided noising"),
312
+ gr.Slider(0.01, 1.0, value=0.01, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
313
 
314
  ],
315
  outputs=[gr.HTML(label="Diffusion Output")],