Kevin Fink commited on
Commit
143e045
·
1 Parent(s): d1a5708
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -34,7 +34,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
34
  rouge_metric = evaluate.load("rouge", cache_dir='/data/cache')
35
  def compute_metrics(eval_preds):
36
  preds, labels = eval_preds
37
- if isinstance(preds, list):
38
  preds = preds[0]
39
 
40
  # Replace -100s used for padding as we can't decode them
@@ -106,20 +106,20 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
106
  # Assuming 'text' is the input and 'target' is the expected output
107
  model_inputs = tokenizer(
108
  examples['text'],
109
- max_length=max_length, # Set to None for dynamic padding
110
- truncation=True,
111
- padding='max_length',
112
- return_tensors='pt',
113
  )
114
 
115
  # Setup the decoder input IDs (shifted right)
116
  labels = tokenizer(
117
  examples['target'],
118
- max_length=128, # Set to None for dynamic padding
119
- truncation=True,
120
- padding='max_length',
121
- #text_target=examples['target'],
122
- return_tensors='pt',
123
  )
124
  #labels["input_ids"] = [
125
  # [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
 
34
  rouge_metric = evaluate.load("rouge", cache_dir='/data/cache')
35
  def compute_metrics(eval_preds):
36
  preds, labels = eval_preds
37
+ if isinstance(preds, tuple):
38
  preds = preds[0]
39
 
40
  # Replace -100s used for padding as we can't decode them
 
106
  # Assuming 'text' is the input and 'target' is the expected output
107
  model_inputs = tokenizer(
108
  examples['text'],
109
+ #max_length=max_length, # Set to None for dynamic padding
110
+ #truncation=True,
111
+ #padding='max_length',
112
+ #return_tensors='pt',
113
  )
114
 
115
  # Setup the decoder input IDs (shifted right)
116
  labels = tokenizer(
117
  examples['target'],
118
+ #max_length=128, # Set to None for dynamic padding
119
+ #truncation=True,
120
+ #padding='max_length',
121
+ ##text_target=examples['target'],
122
+ #return_tensors='pt',
123
  )
124
  #labels["input_ids"] = [
125
  # [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]