Spaces:

Ruurd
/

tini

Running on Zero

App Files Files

Ruurd commited on 17 days ago

Commit

a86f3af

verified ·

1 Parent(s): 7d7b6d7

Add MASK slider

Browse files

Files changed (1) hide show

app.py +31 -17

app.py CHANGED Viewed

@@ -89,17 +89,24 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
     x = i / max_it
     return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
-def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clustering=0.5, noise_start = 1.0):
     noised = input_ids.copy()
     answer_len = len(noised) - answer_start
     num_to_noise = int(threshold * answer_len * noise_start)
     if num_to_noise == 0:
         return noised, []
     mixed_probs = token_probabilities.copy()
-    mixed_probs[eot_token_id] *= eot_weight
-    mixed_probs /= mixed_probs.sum()
     num_clusters = max(1, int((1 - clustering) * num_to_noise))
     cluster_size = max(1, int(num_to_noise / num_clusters))
@@ -116,17 +123,17 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
     noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
     for idx, val in zip(noised_indices, noise):
         noised[idx] = val
-        noised[idx] = tokenizer.encode('MASK', add_special_tokens = False)[0]
     return noised, noised_indices
 # Add new noising function
-def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, eot_weight = 1.0, noise_start = 1.0):
     noised = input_ids.copy()
     answer_len = len(input_ids) - answer_start
     num_to_noise = int(threshold * answer_len * noise_start)
     if num_to_noise == 0:
         return noised
@@ -151,8 +158,14 @@ def confidence_guided_noising(input_ids, answer_start, confidences, noise_clippi
     )
     mixed_probs = token_probabilities.copy()
-    mixed_probs[eot_token_id] *= eot_weight
-    mixed_probs /= mixed_probs.sum()
     noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
     for idx, val in zip(indices, noise):
@@ -177,7 +190,7 @@ def generate_diffusion_text(input_ids):
     return sampled, conf
 # --- Inference Wrapper ---
-def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, clustering, noise_start, use_confidence_noising, noise_clipping):
     placeholder = "What do you know about the city of New York?"
     if question.strip() == "":
         question = placeholder
@@ -197,7 +210,7 @@ def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, cluste
     ori_input_tokens = input_ids
     current_tokens, just_noised_indices = noisify_answer(
-                input_ids, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering, noise_start = 1.0,
             )
     yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
     time.sleep(pause_length)
@@ -244,19 +257,19 @@ def diffusion_chat(question, eot_weight, max_it, pause_length, sharpness, cluste
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
             noised_answer = confidence_guided_noising(
-                current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, eot_weight=eot_weight, noise_start=noise_start
             )
             just_noised_indices = []
         else:
             noised_answer, just_noised_indices = noisify_answer(
-                current_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering, noise_start = noise_start,
             )
         # Compose full input again: prompt + noised answer
         current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
         # --- RED HIGHLIGHT ---
-        decoded_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
         highlighted = []
         for j, tok in enumerate(decoded_tokens):
             tok_id = tokenizer.convert_tokens_to_ids(tok)
@@ -288,14 +301,15 @@ demo = gr.Interface(
     fn=diffusion_chat,
     inputs=[
         gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of New York?"),
-        gr.Slider(0, 1, value=0.4, step=0.05, label="↓ = longer answers (EOT weight)"),
-        gr.Slider(1, 512, value=64, step=1, label="↑ = more iterations"),
         gr.Slider(0.01, 5, value=0.01, step=0.01, label="↑ = longer pause (for visualization)"),
         gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
-        gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more clustered noising (fewer, larger edits)"),
         gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more noise (noise start)"),
         gr.Checkbox(value=False, label="Use confidence-guided noising"),
-        gr.Slider(0.01, 1.0, value=0.05, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
     ],
     outputs=[gr.HTML(label="Diffusion Output")],

     x = i / max_it
     return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
+def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, mask_weight=0.0, clustering=0.5, noise_start = 1.0):
     noised = input_ids.copy()
     answer_len = len(noised) - answer_start
     num_to_noise = int(threshold * answer_len * noise_start)
+    mask_token_id = tokenizer.encode('MASK', add_special_tokens = False)[0]
     if num_to_noise == 0:
         return noised, []
     mixed_probs = token_probabilities.copy()
+    # Scale all other probabilities so they sum to 1 - mask_weight
+    total_other = mixed_probs.sum() - mixed_probs[mask_token_id]
+    scale = (1.0 - mask_weight) / total_other
+    mixed_probs *= scale
+    # Set mask_token_id to mask_weight explicitly
+    mixed_probs[mask_token_id] = mask_weight
     num_clusters = max(1, int((1 - clustering) * num_to_noise))
     cluster_size = max(1, int(num_to_noise / num_clusters))
     noise = rng.choice(np.arange(vocab_size), size=len(noised_indices), p=mixed_probs)
     for idx, val in zip(noised_indices, noise):
         noised[idx] = val
     return noised, noised_indices
 # Add new noising function
+def confidence_guided_noising(input_ids, answer_start, confidences, noise_clipping, threshold=1.0, eot_weight = 1.0, mask_weight = 0.0, noise_start = 1.0):
     noised = input_ids.copy()
     answer_len = len(input_ids) - answer_start
     num_to_noise = int(threshold * answer_len * noise_start)
+    mask_token_id = tokenizer.encode('MASK', add_special_tokens = False)[0]
     if num_to_noise == 0:
         return noised
     )
     mixed_probs = token_probabilities.copy()
+    # Scale all other probabilities so they sum to 1 - mask_weight
+    total_other = mixed_probs.sum() - mixed_probs[mask_token_id]
+    scale = (1.0 - mask_weight) / total_other
+    mixed_probs *= scale
+    # Set mask_token_id to mask_weight explicitly
+    mixed_probs[mask_token_id] = mask_weight
     noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
     for idx, val in zip(indices, noise):
     return sampled, conf
 # --- Inference Wrapper ---
+def diffusion_chat(question, eot_weight, mask_weight, max_it, pause_length, sharpness, clustering, noise_start, use_confidence_noising, noise_clipping):
     placeholder = "What do you know about the city of New York?"
     if question.strip() == "":
         question = placeholder
     ori_input_tokens = input_ids
     current_tokens, just_noised_indices = noisify_answer(
+                input_ids, answer_start, threshold=1.0, eot_weight=eot_weight, mask_weight=mask_weight, clustering=clustering, noise_start = 1.0,
             )
     yield f"<b>Iteration 0 (initial noise):</b><br>" + tokenizer.decode(current_tokens[answer_start:], skip_special_tokens=True).replace('\n', '<br>')
     time.sleep(pause_length)
         threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
         if use_confidence_noising:
             noised_answer = confidence_guided_noising(
+                current_tokens, answer_start, confidences, noise_clipping, threshold=threshold, eot_weight=eot_weight, mask_weight=mask_weight, noise_start=noise_start
             )
             just_noised_indices = []
         else:
             noised_answer, just_noised_indices = noisify_answer(
+                current_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, mask_weight=mask_weight, clustering=clustering, noise_start = noise_start,
             )
         # Compose full input again: prompt + noised answer
         current_tokens = ori_input_tokens[:answer_start] + noised_answer[answer_start:]
         # --- RED HIGHLIGHT ---
+        decoded_tokens = tokenizer.convert_ids_to_tokens(previous_tokens[answer_start:])
         highlighted = []
         for j, tok in enumerate(decoded_tokens):
             tok_id = tokenizer.convert_tokens_to_ids(tok)
     fn=diffusion_chat,
     inputs=[
         gr.Textbox(label="User Question", lines=2, placeholder="What do you know about the city of New York?"),
+        gr.Slider(0, 1, value=0.5, step=0.05, label="↓ = longer answers (EOT weight)"),
+        gr.Slider(0, 1, value=0.5, step=0.05, label="↓ = more random answers (MASK weight)"),
+        gr.Slider(1, 512, value=32, step=1, label="↑ = more iterations"),
         gr.Slider(0.01, 5, value=0.01, step=0.01, label="↑ = longer pause (for visualization)"),
         gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
+        gr.Slider(0.0, 1.0, value=0.0, step=0.05, label="↑ = more clustered noising (fewer, larger edits)"),
         gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="↑ = more noise (noise start)"),
         gr.Checkbox(value=False, label="Use confidence-guided noising"),
+        gr.Slider(0.01, 1.0, value=0.01, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
     ],
     outputs=[gr.HTML(label="Diffusion Output")],