mebubo commited on
Commit
ada166c
·
1 Parent(s): c6407ad

refactor: Simplify main function by removing unnecessary indentation

Browse files
Files changed (1) hide show
  1. app.py +18 -19
app.py CHANGED
@@ -47,27 +47,26 @@ def generate_replacements(model, tokenizer, prefix, device, num_samples=5):
47
  new_words.append(new_word)
48
  return new_words
49
 
50
- def main():
51
- model_name = "mistralai/Mistral-7B-v0.1"
52
- model, tokenizer, device = load_model_and_tokenizer(model_name)
53
 
54
- input_text = "He asked me to prostrate myself before the king, but I rifused."
55
- inputs, input_ids = process_input_text(input_text, tokenizer, device)
56
 
57
- result = calculate_log_probabilities(model, tokenizer, inputs, input_ids)
58
 
59
- words = split_into_words([token for token, _ in result], [logprob for _, logprob in result])
60
- log_prob_threshold = -5.0
61
- low_prob_words = [word for word in words if word.logprob < log_prob_threshold]
62
 
63
- for word in low_prob_words:
64
- prefix_index = word.first_token_index
65
- prefix_tokens = [token for token, _ in result][:prefix_index + 1]
66
- prefix = tokenizer.convert_tokens_to_string(prefix_tokens)
67
- replacements = generate_replacements(model, tokenizer, prefix, device)
68
- print(f"Original word: {word.text}, Log Probability: {word.logprob:.4f}")
69
- print(f"Proposed replacements: {replacements}")
70
- print()
71
 
72
- if __name__ == "__main__":
73
- main()
 
 
 
 
 
 
 
47
  new_words.append(new_word)
48
  return new_words
49
 
50
+ #%%
51
+ model_name = "mistralai/Mistral-7B-v0.1"
52
+ model, tokenizer, device = load_model_and_tokenizer(model_name)
53
 
54
+ input_text = "He asked me to prostrate myself before the king, but I rifused."
55
+ inputs, input_ids = process_input_text(input_text, tokenizer, device)
56
 
57
+ result = calculate_log_probabilities(model, tokenizer, inputs, input_ids)
58
 
59
+ words = split_into_words([token for token, _ in result], [logprob for _, logprob in result])
60
+ log_prob_threshold = -5.0
61
+ low_prob_words = [word for word in words if word.logprob < log_prob_threshold]
62
 
63
+ #%%
 
 
 
 
 
 
 
64
 
65
+ for word in low_prob_words:
66
+ prefix_index = word.first_token_index
67
+ prefix_tokens = [token for token, _ in result][:prefix_index + 1]
68
+ prefix = tokenizer.convert_tokens_to_string(prefix_tokens)
69
+ replacements = generate_replacements(model, tokenizer, prefix, device)
70
+ print(f"Original word: {word.text}, Log Probability: {word.logprob:.4f}")
71
+ print(f"Proposed replacements: {replacements}")
72
+ print()