feat: add filtering for low probability words based on log probability threshold
Browse files
app.py
CHANGED
@@ -76,6 +76,12 @@ for word, avg_logprob in words:
|
|
76 |
|
77 |
words = split_into_words(tokens[1:], token_log_probs)
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
#%%
|
81 |
def generate_replacements(model, tokenizer, prefix, num_samples=5):
|
|
|
76 |
|
77 |
words = split_into_words(tokens[1:], token_log_probs)
|
78 |
|
79 |
+
# Define a threshold for low probability words
|
80 |
+
log_prob_threshold = -5.0
|
81 |
+
|
82 |
+
# Filter words with log probability below the threshold
|
83 |
+
low_prob_words = [word for word in words if word.logprob < log_prob_threshold]
|
84 |
+
|
85 |
|
86 |
#%%
|
87 |
def generate_replacements(model, tokenizer, prefix, num_samples=5):
|