Spaces:
Sleeping
Sleeping
Add insert noising
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ import numpy as np
|
|
4 |
import json
|
5 |
import time
|
6 |
from transformers import AutoTokenizer
|
7 |
-
from llama_diffusion_model import disable_dropout
|
8 |
import os
|
9 |
import importlib
|
10 |
from huggingface_hub import hf_hub_download
|
@@ -57,22 +56,32 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
|
|
57 |
x = i / max_it
|
58 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
59 |
|
60 |
-
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0):
|
61 |
noised = input_ids.copy()
|
62 |
-
answer_len = len(
|
63 |
num_to_noise = int(threshold * answer_len)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
if num_to_noise > 0:
|
65 |
-
indices = rng.choice(np.arange(answer_start, len(
|
66 |
|
67 |
mixed_probs = token_probabilities.copy()
|
68 |
mixed_probs[eot_token_id] *= eot_weight
|
69 |
mixed_probs /= mixed_probs.sum()
|
70 |
|
71 |
-
noise = rng.choice(np.arange(vocab_size), size=
|
72 |
for idx, val in zip(indices, noise):
|
73 |
noised[idx] = val
|
|
|
74 |
return noised
|
75 |
|
|
|
76 |
# Add new noising function
|
77 |
def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
|
78 |
noised = input_ids.copy()
|
@@ -82,9 +91,14 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
|
|
82 |
if num_to_noise == 0:
|
83 |
return noised
|
84 |
|
85 |
-
|
86 |
raw_weights = 1.0 - np.array(confidences[answer_start:])
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
88 |
weights = raw_weights / raw_weights.sum()
|
89 |
|
90 |
if num_to_noise > len(weights):
|
@@ -97,10 +111,8 @@ def confidence_guided_noising(input_ids, answer_start, confidences, threshold, e
|
|
97 |
p=weights
|
98 |
)
|
99 |
|
100 |
-
# Avoid zero-probability for token sampling
|
101 |
mixed_probs = token_probabilities.copy()
|
102 |
mixed_probs[eot_token_id] *= eot_weight
|
103 |
-
# mixed_probs = np.clip(mixed_probs, 1e-6, None) # fix for EOT weight near 0
|
104 |
mixed_probs /= mixed_probs.sum()
|
105 |
|
106 |
noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
|
@@ -123,13 +135,10 @@ def generate_diffusion_text(input_ids, answer_start):
|
|
123 |
|
124 |
# Extract confidence of selected tokens
|
125 |
conf = probs[range(len(sampled)), sampled].cpu().numpy()
|
126 |
-
return sampled, conf
|
127 |
|
128 |
# --- Inference Wrapper ---
|
129 |
-
|
130 |
-
|
131 |
-
# Modify diffusion_chat to use new noising conditionally
|
132 |
-
def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_confidence_noising):
|
133 |
placeholder = "What do you know about the city of New York?"
|
134 |
if question.strip() == "":
|
135 |
question = placeholder
|
@@ -186,7 +195,7 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
|
|
186 |
if use_confidence_noising:
|
187 |
current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
|
188 |
else:
|
189 |
-
current_tokens = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight)
|
190 |
|
191 |
time.sleep(0.01)
|
192 |
|
@@ -197,7 +206,6 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
|
|
197 |
yield f"<b>Final Output (after {i+1} iterations):</b><br>" + final_output.replace('\n', '<br>')
|
198 |
|
199 |
# --- Gradio Interface ---
|
200 |
-
|
201 |
print("Loading model...")
|
202 |
model = load_model()
|
203 |
print("✅ Model loaded.")
|
@@ -209,8 +217,9 @@ demo = gr.Interface(
|
|
209 |
gr.Slider(0, 1, value=0.4, step=0.05, label="↓ = longer answers (EOT weight)"),
|
210 |
gr.Slider(1, 512, value=64, step=1, label="↑ = more iterations"),
|
211 |
gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
|
212 |
-
gr.Slider(0.01, 1.0, value=0.05, step=0.01, label="
|
213 |
-
gr.Checkbox(value=False, label="Use confidence-guided noising")
|
|
|
214 |
],
|
215 |
outputs=[gr.HTML(label="Diffusion Output")],
|
216 |
title="Diffusion Language Model Chat",
|
|
|
4 |
import json
|
5 |
import time
|
6 |
from transformers import AutoTokenizer
|
|
|
7 |
import os
|
8 |
import importlib
|
9 |
from huggingface_hub import hf_hub_download
|
|
|
56 |
x = i / max_it
|
57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
58 |
|
59 |
+
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, insert_prob=0.05):
|
60 |
noised = input_ids.copy()
|
61 |
+
answer_len = len(noised) - answer_start
|
62 |
num_to_noise = int(threshold * answer_len)
|
63 |
+
|
64 |
+
# Randomly insert one token with probability `insert_prob`
|
65 |
+
if rng.random() < insert_prob:
|
66 |
+
insert_idx = rng.integers(answer_start + 1, len(noised)) # Avoid inserting at the very start
|
67 |
+
insert_token = rng.choice(np.arange(vocab_size), p=token_probabilities)
|
68 |
+
noised = np.concatenate([noised[:insert_idx], [insert_token], noised[insert_idx:]])
|
69 |
+
noised = noised[:len(input_ids)]
|
70 |
+
|
71 |
if num_to_noise > 0:
|
72 |
+
indices = rng.choice(np.arange(answer_start, len(noised)), size=min(num_to_noise, len(noised) - answer_start), replace=False)
|
73 |
|
74 |
mixed_probs = token_probabilities.copy()
|
75 |
mixed_probs[eot_token_id] *= eot_weight
|
76 |
mixed_probs /= mixed_probs.sum()
|
77 |
|
78 |
+
noise = rng.choice(np.arange(vocab_size), size=len(indices), p=mixed_probs)
|
79 |
for idx, val in zip(indices, noise):
|
80 |
noised[idx] = val
|
81 |
+
|
82 |
return noised
|
83 |
|
84 |
+
|
85 |
# Add new noising function
|
86 |
def confidence_guided_noising(input_ids, answer_start, confidences, threshold, eot_weight, noise_clipping):
|
87 |
noised = input_ids.copy()
|
|
|
91 |
if num_to_noise == 0:
|
92 |
return noised
|
93 |
|
94 |
+
|
95 |
raw_weights = 1.0 - np.array(confidences[answer_start:])
|
96 |
+
|
97 |
+
# Avoid zero-probability weights for selection
|
98 |
+
# If noise clipping == 1, all tokens have equal chance to be noised.
|
99 |
+
# If noise_clipping == 0.00001, all tokens are noised according to the confidence of the past prediction
|
100 |
+
raw_weights = np.clip(raw_weights, a_min = noise_clipping, a_max = None)
|
101 |
+
|
102 |
weights = raw_weights / raw_weights.sum()
|
103 |
|
104 |
if num_to_noise > len(weights):
|
|
|
111 |
p=weights
|
112 |
)
|
113 |
|
|
|
114 |
mixed_probs = token_probabilities.copy()
|
115 |
mixed_probs[eot_token_id] *= eot_weight
|
|
|
116 |
mixed_probs /= mixed_probs.sum()
|
117 |
|
118 |
noise = rng.choice(np.arange(vocab_size), size=num_to_noise, p=mixed_probs)
|
|
|
135 |
|
136 |
# Extract confidence of selected tokens
|
137 |
conf = probs[range(len(sampled)), sampled].cpu().numpy()
|
138 |
+
return sampled, conf
|
139 |
|
140 |
# --- Inference Wrapper ---
|
141 |
+
def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_confidence_noising, insert_prob):
|
|
|
|
|
|
|
142 |
placeholder = "What do you know about the city of New York?"
|
143 |
if question.strip() == "":
|
144 |
question = placeholder
|
|
|
195 |
if use_confidence_noising:
|
196 |
current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
|
197 |
else:
|
198 |
+
current_tokens = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, insert_prob=insert_prob)
|
199 |
|
200 |
time.sleep(0.01)
|
201 |
|
|
|
206 |
yield f"<b>Final Output (after {i+1} iterations):</b><br>" + final_output.replace('\n', '<br>')
|
207 |
|
208 |
# --- Gradio Interface ---
|
|
|
209 |
print("Loading model...")
|
210 |
model = load_model()
|
211 |
print("✅ Model loaded.")
|
|
|
217 |
gr.Slider(0, 1, value=0.4, step=0.05, label="↓ = longer answers (EOT weight)"),
|
218 |
gr.Slider(1, 512, value=64, step=1, label="↑ = more iterations"),
|
219 |
gr.Slider(1.0, 20.0, value=5.0, step=0.5, label="↓ = more noising (sharpness)"),
|
220 |
+
gr.Slider(0.01, 1.0, value=0.05, step=0.01, label="↓ = more confidence guidance (noise clipping)"),
|
221 |
+
gr.Checkbox(value=False, label="Use confidence-guided noising"),
|
222 |
+
gr.Slider(0.0, 1.0, value=0.05, step=0.01, label="Chance to insert token (↓ = less structural change)")
|
223 |
],
|
224 |
outputs=[gr.HTML(label="Diffusion Output")],
|
225 |
title="Diffusion Language Model Chat",
|