fix: Include attention mask in model input and function parameters
Browse files
app.py
CHANGED
@@ -14,12 +14,13 @@ def load_model_and_tokenizer(model_name):
|
|
14 |
return model, tokenizer, device
|
15 |
|
16 |
def process_input_text(input_text, tokenizer, device):
|
|
|
17 |
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
18 |
input_ids = inputs["input_ids"]
|
19 |
attention_mask = inputs["attention_mask"]
|
20 |
-
return inputs, input_ids
|
21 |
|
22 |
-
def calculate_log_probabilities(model, tokenizer, inputs, input_ids):
|
23 |
with torch.no_grad():
|
24 |
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
|
25 |
logits = outputs.logits[0, :-1, :]
|
@@ -56,9 +57,9 @@ model_name = "mistralai/Mistral-7B-v0.1"
|
|
56 |
model, tokenizer, device = load_model_and_tokenizer(model_name)
|
57 |
|
58 |
input_text = "He asked me to prostrate myself before the king, but I rifused."
|
59 |
-
inputs, input_ids = process_input_text(input_text, tokenizer, device)
|
60 |
|
61 |
-
result = calculate_log_probabilities(model, tokenizer, inputs, input_ids)
|
62 |
|
63 |
words = split_into_words([token for token, _ in result], [logprob for _, logprob in result])
|
64 |
log_prob_threshold = -5.0
|
|
|
14 |
return model, tokenizer, device
|
15 |
|
16 |
def process_input_text(input_text, tokenizer, device):
|
17 |
+
"""Process input text to obtain input IDs and attention mask."""
|
18 |
inputs = tokenizer(input_text, return_tensors="pt").to(device)
|
19 |
input_ids = inputs["input_ids"]
|
20 |
attention_mask = inputs["attention_mask"]
|
21 |
+
return inputs, input_ids, attention_mask
|
22 |
|
23 |
+
def calculate_log_probabilities(model, tokenizer, inputs, input_ids, attention_mask):
|
24 |
with torch.no_grad():
|
25 |
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
|
26 |
logits = outputs.logits[0, :-1, :]
|
|
|
57 |
model, tokenizer, device = load_model_and_tokenizer(model_name)
|
58 |
|
59 |
input_text = "He asked me to prostrate myself before the king, but I rifused."
|
60 |
+
inputs, input_ids, attention_mask = process_input_text(input_text, tokenizer, device)
|
61 |
|
62 |
+
result = calculate_log_probabilities(model, tokenizer, inputs, input_ids, attention_mask)
|
63 |
|
64 |
words = split_into_words([token for token, _ in result], [logprob for _, logprob in result])
|
65 |
log_prob_threshold = -5.0
|