Spaces:

Ruurd
/

tini

Running on Zero

App Files Files

Ruurd commited on 22 days ago

Commit

d29da35

1 Parent(s): 5976e7b

Fix red highlighting

Browse files

Files changed (1) hide show

app.py +39 -26

app.py CHANGED Viewed

@@ -165,37 +165,57 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
     current_tokens, just_noised_indices = noisify_answer(
         ori_input_tokens, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering
     )
-    prev_decoded_tokens = []
     last_tokens = []
     for i in range(max_it):
         print('Generating output')
         generated_tokens, confidences = generate_diffusion_text(current_tokens, answer_start)
         current_tokens = generated_tokens
-        just_noised_indices = []
         decoded_ids = current_tokens[answer_start:]
         decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
         filtered_tokens = [tok for tok in decoded_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
-        filtered_prev_tokens = [tok for tok in prev_decoded_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id] if prev_decoded_tokens else []
-        if filtered_prev_tokens:
-            highlighted = []
-            for i, tok in enumerate(decoded_tokens):
-                token_str = tokenizer.convert_tokens_to_string([tok])
-                abs_idx = answer_start + i
-                if abs_idx in just_noised_indices:
-                    highlighted.append(f'<span style="color:red">{token_str}</span>')
-                elif prev_decoded_tokens and i < len(prev_decoded_tokens) and tok != prev_decoded_tokens[i]:
-                    highlighted.append(f'<span style="color:green">{token_str}</span>')
-                else:
-                    highlighted.append(token_str)
         else:
-            highlighted = [tokenizer.convert_tokens_to_string([tok]) for tok in filtered_tokens]
-        prev_decoded_tokens = decoded_tokens
-        yield f"<b>Iteration {i+1}/{max_it} (running):</b><br>" + "".join(highlighted).replace('\n', '<br>')
         last_tokens.append(generated_tokens)
         if len(last_tokens) > 3:
             last_tokens.pop(0)
@@ -203,13 +223,6 @@ def diffusion_chat(question, eot_weight, max_it, sharpness, noise_clipping, use_
             yield f"<b>Stopped early after {i+1} iterations.</b>"
             break
-        threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
-        if use_confidence_noising:
-            current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
-        else:
-            current_tokens, just_noised_indices = noisify_answer(generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering)
-        time.sleep(0.01)
     final_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
     final_tokens = [tok for tok in final_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]

     current_tokens, just_noised_indices = noisify_answer(
         ori_input_tokens, answer_start, threshold=1.0, eot_weight=eot_weight, clustering=clustering
     )
     last_tokens = []
+    just_noised_indices = []
     for i in range(max_it):
         print('Generating output')
         generated_tokens, confidences = generate_diffusion_text(current_tokens, answer_start)
         current_tokens = generated_tokens
+        # --- Decode and highlight changed tokens in GREEN ---
         decoded_ids = current_tokens[answer_start:]
         decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
         filtered_tokens = [tok for tok in decoded_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
+        highlighted = []
+        for i, tok in enumerate(decoded_tokens):
+            token_str = tokenizer.convert_tokens_to_string([tok])
+            if filtered_tokens and i < len(filtered_tokens) and tok != filtered_tokens[i]:
+                highlighted.append(f'<span style="color:green">{token_str}</span>')
+            else:
+                highlighted.append(token_str)
+        yield f"<b>Iteration {i+1}/{max_it} (after generation):</b><br>" + "".join(highlighted).replace('\n', '<br>')
+        time.sleep(0.1)
+        # --- Apply noising and highlight RED tokens ---
+        threshold = get_noising_schedule(i, max_it, sharpness=sharpness)
+        if use_confidence_noising:
+            current_tokens = confidence_guided_noising(generated_tokens, answer_start, confidences, threshold, eot_weight, noise_clipping)
+            just_noised_indices = []  # optional: could track confidence-weighted indices too
         else:
+            current_tokens, just_noised_indices = noisify_answer(
+                generated_tokens, answer_start, threshold=threshold, eot_weight=eot_weight, clustering=clustering
+            )
+        decoded_ids = current_tokens[answer_start:]
+        decoded_tokens = tokenizer.convert_ids_to_tokens(decoded_ids)
+        filtered_tokens = [tok for tok in decoded_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]
+        highlighted = []
+        for i, tok in enumerate(filtered_tokens):
+            token_str = tokenizer.convert_tokens_to_string([tok])
+            abs_idx = answer_start + i
+            if abs_idx in just_noised_indices:
+                highlighted.append(f'<span style="color:red">{token_str}</span>')
+            else:
+                highlighted.append(token_str)
+        yield f"<b>Iteration {i+1}/{max_it} (after noising):</b><br>" + "".join(highlighted).replace('\n', '<br>')
+        time.sleep(0.1)
+        # --- Early stopping ---
         last_tokens.append(generated_tokens)
         if len(last_tokens) > 3:
             last_tokens.pop(0)
             yield f"<b>Stopped early after {i+1} iterations.</b>"
             break
     final_tokens = tokenizer.convert_ids_to_tokens(current_tokens[answer_start:])
     final_tokens = [tok for tok in final_tokens if tokenizer.convert_tokens_to_ids(tok) != eot_token_id]