Kevin Fink
commited on
Commit
·
143e045
1
Parent(s):
d1a5708
deve
Browse files
app.py
CHANGED
@@ -34,7 +34,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
34 |
rouge_metric = evaluate.load("rouge", cache_dir='/data/cache')
|
35 |
def compute_metrics(eval_preds):
|
36 |
preds, labels = eval_preds
|
37 |
-
if isinstance(preds,
|
38 |
preds = preds[0]
|
39 |
|
40 |
# Replace -100s used for padding as we can't decode them
|
@@ -106,20 +106,20 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
106 |
# Assuming 'text' is the input and 'target' is the expected output
|
107 |
model_inputs = tokenizer(
|
108 |
examples['text'],
|
109 |
-
max_length=max_length, # Set to None for dynamic padding
|
110 |
-
truncation=True,
|
111 |
-
padding='max_length',
|
112 |
-
return_tensors='pt',
|
113 |
)
|
114 |
|
115 |
# Setup the decoder input IDs (shifted right)
|
116 |
labels = tokenizer(
|
117 |
examples['target'],
|
118 |
-
max_length=128, # Set to None for dynamic padding
|
119 |
-
truncation=True,
|
120 |
-
padding='max_length',
|
121 |
-
|
122 |
-
return_tensors='pt',
|
123 |
)
|
124 |
#labels["input_ids"] = [
|
125 |
# [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
|
|
|
34 |
rouge_metric = evaluate.load("rouge", cache_dir='/data/cache')
|
35 |
def compute_metrics(eval_preds):
|
36 |
preds, labels = eval_preds
|
37 |
+
if isinstance(preds, tuple):
|
38 |
preds = preds[0]
|
39 |
|
40 |
# Replace -100s used for padding as we can't decode them
|
|
|
106 |
# Assuming 'text' is the input and 'target' is the expected output
|
107 |
model_inputs = tokenizer(
|
108 |
examples['text'],
|
109 |
+
#max_length=max_length, # Set to None for dynamic padding
|
110 |
+
#truncation=True,
|
111 |
+
#padding='max_length',
|
112 |
+
#return_tensors='pt',
|
113 |
)
|
114 |
|
115 |
# Setup the decoder input IDs (shifted right)
|
116 |
labels = tokenizer(
|
117 |
examples['target'],
|
118 |
+
#max_length=128, # Set to None for dynamic padding
|
119 |
+
#truncation=True,
|
120 |
+
#padding='max_length',
|
121 |
+
##text_target=examples['target'],
|
122 |
+
#return_tensors='pt',
|
123 |
)
|
124 |
#labels["input_ids"] = [
|
125 |
# [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
|