Ruurd commited on
Commit
13b1370
·
1 Parent(s): 7c2923c

Add insert noising

Browse files
Files changed (1) hide show
  1. app.py +27 -18
app.py CHANGED
@@ -4,7 +4,6 @@ import numpy as np
4
  import json
5
  import time
6
  from transformers import AutoTokenizer
7
- from llama_diffusion_model import disable_dropout
8
  import os
9
  import importlib
10
  from huggingface_hub import hf_hub_download
@@ -57,22 +56,32 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
57
  x = i / max_it
58
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
59
 
60
- def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0):
61
  noised = input_ids.copy()
62
- answer_len = len(input_ids) - answer_start
63
  num_to_noise = int(threshold * answer_len)
 
 
 
 
 
 
 
 
64
  if num_to_noise > 0:
65
- indices = rng.choice(np.arange(answer_start, len(input_ids)), size=num_to_noise, replace=False)
66
 
67
  mixed_probs = token_probabilities.copy()
68
  mixed_probs[eot_token_id] *= eot_weight
69
  mixed_probs /= mixed_probs.sum()
70
 
71
- noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
72
  for idx, val in zip(indices, noise):
73
  noised[idx] = val
 
74
  return noised
75
 
 
76
  # Add new noising function
77
  def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
78
  noised = input_ids.copy()
@@ -82,9 +91,14 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
82
  if num_to_noise == 0:
83
  return noised
84
 
85
- # Avoid zero-probability weights for selection
86
  raw_weights = 1.0 - np.array(confidences[answer_start:])
87
- raw_weights = np.clip(raw_weights, noise_clipping, None) # avoid 0s
 
 
 
 
 
88
  weights = raw_weights / raw_weights.sum()
89
 
90
  if num_to_noise > len(weights):
@@ -97,10 +111,8 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
97
  p=weights
98
  )
99
 
100
- # Avoid zero-probability for token sampling
101
  mixed_probs = token_probabilities.copy()
102
  mixed_probs[eot_token_id] *= eot_weight
103
- # mixed_probs = np.clip(mixed_probs, 1e-6, None) # fix for EOT weight near 0
104
  mixed_probs /= mixed_probs.sum()
105
 
106
  noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
@@ -123,13 +135,10 @@ def generate_diffusion_text(input_ids, answer_start):
123
 
124
  # Extract confidence of selected tokens
125
  conf = probs[range(len(sampled)), sampled].cpu().numpy()
126
- return sampled, conf # ✅ NEW: Return confidence
127
 
128
  # --- Inference Wrapper ---
129
-
130
-
131
- # Modify diffusion_chat to use new noising conditionally
132
- def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_confidence_noising):
133
  placeholder = "What do you know about the city of New York?"
134
  if question.strip() == "":
135
  question = placeholder
@@ -186,7 +195,7 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
186
  if use_confidence_noising:
187
  current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
188
  else:
189
- current_tokens = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight)
190
 
191
  time.sleep(0.01)
192
 
@@ -197,7 +206,6 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
197
  yield f"<b>Final Output (after {i+1} iterations):</b><br>" + final_output.replace('\n', '<br>')
198
 
199
  # --- Gradio Interface ---
200
-
201
  print("Loading model...")
202
  model = load_model()
203
  print("✅ Model loaded.")
@@ -209,8 +217,9 @@ demo = gr.Interface(
209
  gr.Slider(0, 1, value=0.4, step=0.05, label="↓ = longer answers (EOT weight)"),
210
  gr.Slider(1, 512, value=64, step=1, label="↑ = more iterations"),
211
  gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
212
- gr.Slider(0.01, 1.0, value=0.05, step=0.01, label=" = more confidence guidance (noise clipping)"),
213
- gr.Checkbox(value=False, label="Use confidence-guided noising") # ✅ NEW
 
214
  ],
215
  outputs=[gr.HTML(label="Diffusion Output")],
216
  title="Diffusion Language Model Chat",
 
4
  import json
5
  import time
6
  from transformers import AutoTokenizer
 
7
  import os
8
  import importlib
9
  from huggingface_hub import hf_hub_download
 
56
  x = i / max_it
57
  return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
58
 
59
+ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, insert_prob=0.05):
60
  noised = input_ids.copy()
61
+ answer_len = len(noised) - answer_start
62
  num_to_noise = int(threshold * answer_len)
63
+
64
+ # Randomly insert one token with probability `insert_prob`
65
+ if rng.random() < insert_prob:
66
+ insert_idx = rng.integers(answer_start + 1, len(noised)) # Avoid inserting at the very start
67
+ insert_token = rng.choice(np.arange(vocab_size), p=token_probabilities)
68
+ noised = np.concatenate([noised[:insert_idx], [insert_token], noised[insert_idx:]])
69
+ noised = noised[:len(input_ids)]
70
+
71
  if num_to_noise > 0:
72
+ indices = rng.choice(np.arange(answer_start, len(noised)), size=min(num_to_noise, len(noised) - answer_start), replace=False)
73
 
74
  mixed_probs = token_probabilities.copy()
75
  mixed_probs[eot_token_id] *= eot_weight
76
  mixed_probs /= mixed_probs.sum()
77
 
78
+ noise = rng.choice(np.arange(vocab_size), size=len(indices), p=mixed_probs)
79
  for idx, val in zip(indices, noise):
80
  noised[idx] = val
81
+
82
  return noised
83
 
84
+
85
  # Add new noising function
86
  def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
87
  noised = input_ids.copy()
 
91
  if num_to_noise == 0:
92
  return noised
93
 
94
+
95
  raw_weights = 1.0 - np.array(confidences[answer_start:])
96
+
97
+ # Avoid zero-probability weights for selection
98
+ # If noise clipping == 1, all tokens have equal chance to be noised.
99
+ # If noise_clipping == 0.00001, all tokens are noised according to the confidence of the past prediction
100
+ raw_weights = np.clip(raw_weights, a_min = noise_clipping, a_max = None)
101
+
102
  weights = raw_weights / raw_weights.sum()
103
 
104
  if num_to_noise > len(weights):
 
111
  p=weights
112
  )
113
 
 
114
  mixed_probs = token_probabilities.copy()
115
  mixed_probs[eot_token_id] *= eot_weight
 
116
  mixed_probs /= mixed_probs.sum()
117
 
118
  noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
 
135
 
136
  # Extract confidence of selected tokens
137
  conf = probs[range(len(sampled)), sampled].cpu().numpy()
138
+ return sampled, conf
139
 
140
  # --- Inference Wrapper ---
141
+ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_confidence_noising, insert_prob):
 
 
 
142
  placeholder = "What do you know about the city of New York?"
143
  if question.strip() == "":
144
  question = placeholder
 
195
  if use_confidence_noising:
196
  current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
197
  else:
198
+ current_tokens = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, insert_prob=insert_prob)
199
 
200
  time.sleep(0.01)
201
 
 
206
  yield f"<b>Final Output (after {i+1} iterations):</b><br>" + final_output.replace('\n', '<br>')
207
 
208
  # --- Gradio Interface ---
 
209
  print("Loading model...")
210
  model = load_model()
211
  print("✅ Model loaded.")
 
217
  gr.Slider(0, 1, value=0.4, step=0.05, label="↓ = longer answers (EOT weight)"),
218
  gr.Slider(1, 512, value=64, step=1, label="↑ = more iterations"),
219
  gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
220
+ gr.Slider(0.01, 1.0, value=0.05, step=0.01, label=" = more confidence guidance (noise clipping)"),
221
+ gr.Checkbox(value=False, label="Use confidence-guided noising"),
222
+ gr.Slider(0.0, 1.0, value=0.05, step=0.01, label="Chance to insert token (↓ = less structural change)")
223
  ],
224
  outputs=[gr.HTML(label="Diffusion Output")],
225
  title="Diffusion Language Model Chat",