Spaces:
Running
on
Zero
Running
on
Zero
Fix cluster to clustering
Browse files
app.py
CHANGED
@@ -56,7 +56,7 @@ def get_noising_schedule(i, max_it, sharpness=5.0):
|
|
56 |
x = i / max_it
|
57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
58 |
|
59 |
-
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0,
|
60 |
noised = input_ids.copy()
|
61 |
answer_len = len(noised) - answer_start
|
62 |
num_to_noise = int(threshold * answer_len)
|
@@ -69,7 +69,7 @@ def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clust
|
|
69 |
mixed_probs /= mixed_probs.sum()
|
70 |
|
71 |
# Determine number of clusters and average cluster size
|
72 |
-
num_clusters = max(1, int((1 -
|
73 |
cluster_size = max(1, int(num_to_noise / num_clusters))
|
74 |
|
75 |
noised_indices = set()
|
|
|
56 |
x = i / max_it
|
57 |
return (np.exp(-sharpness * x) - np.exp(-sharpness)) / (1 - np.exp(-sharpness))
|
58 |
|
59 |
+
def noisify_answer(input_ids, answer_start, threshold=1.0, eot_weight=1.0, clustering=0.5):
|
60 |
noised = input_ids.copy()
|
61 |
answer_len = len(noised) - answer_start
|
62 |
num_to_noise = int(threshold * answer_len)
|
|
|
69 |
mixed_probs /= mixed_probs.sum()
|
70 |
|
71 |
# Determine number of clusters and average cluster size
|
72 |
+
num_clusters = max(1, int((1 - clustering) * num_to_noise)) # fewer clusters if more intensity
|
73 |
cluster_size = max(1, int(num_to_noise / num_clusters))
|
74 |
|
75 |
noised_indices = set()
|