mebubo commited on
Commit
b47d499
·
1 Parent(s): 9029ade

fix: Include attention mask in model input and function parameters

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -14,12 +14,13 @@ def load_model_and_tokenizer(model_name):
14
  return model, tokenizer, device
15
 
16
  def process_input_text(input_text, tokenizer, device):
 
17
  inputs = tokenizer(input_text, return_tensors="pt").to(device)
18
  input_ids = inputs["input_ids"]
19
  attention_mask = inputs["attention_mask"]
20
- return inputs, input_ids
21
 
22
- def calculate_log_probabilities(model, tokenizer, inputs, input_ids):
23
  with torch.no_grad():
24
  outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
25
  logits = outputs.logits[0, :-1, :]
@@ -56,9 +57,9 @@ model_name = "mistralai/Mistral-7B-v0.1"
56
  model, tokenizer, device = load_model_and_tokenizer(model_name)
57
 
58
  input_text = "He asked me to prostrate myself before the king, but I rifused."
59
- inputs, input_ids = process_input_text(input_text, tokenizer, device)
60
 
61
- result = calculate_log_probabilities(model, tokenizer, inputs, input_ids)
62
 
63
  words = split_into_words([token for token, _ in result], [logprob for _, logprob in result])
64
  log_prob_threshold = -5.0
 
14
  return model, tokenizer, device
15
 
16
  def process_input_text(input_text, tokenizer, device):
17
+ """Process input text to obtain input IDs and attention mask."""
18
  inputs = tokenizer(input_text, return_tensors="pt").to(device)
19
  input_ids = inputs["input_ids"]
20
  attention_mask = inputs["attention_mask"]
21
+ return inputs, input_ids, attention_mask
22
 
23
+ def calculate_log_probabilities(model, tokenizer, inputs, input_ids, attention_mask):
24
  with torch.no_grad():
25
  outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
26
  logits = outputs.logits[0, :-1, :]
 
57
  model, tokenizer, device = load_model_and_tokenizer(model_name)
58
 
59
  input_text = "He asked me to prostrate myself before the king, but I rifused."
60
+ inputs, input_ids, attention_mask = process_input_text(input_text, tokenizer, device)
61
 
62
+ result = calculate_log_probabilities(model, tokenizer, inputs, input_ids, attention_mask)
63
 
64
  words = split_into_words([token for token, _ in result], [logprob for _, logprob in result])
65
  log_prob_threshold = -5.0