Spaces:

shorecode
/

gradio-3

Sleeping

Kevin Fink commited on Dec 9, 2024

Commit

143e045

1 Parent(s): d1a5708

deve

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
         rouge_metric = evaluate.load("rouge", cache_dir='/data/cache')
         def compute_metrics(eval_preds):
             preds, labels = eval_preds
-            if isinstance(preds, list):
                 preds = preds[0]
             # Replace -100s used for padding as we can't decode them
@@ -106,20 +106,20 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
             # Assuming 'text' is the input and 'target' is the expected output
             model_inputs = tokenizer(
                 examples['text'],
-                max_length=max_length,  # Set to None for dynamic padding
-                truncation=True,
-                padding='max_length',
-                return_tensors='pt',
             )
             # Setup the decoder input IDs (shifted right)
             labels = tokenizer(
                 examples['target'],
-                max_length=128,  # Set to None for dynamic padding
-                truncation=True,
-                padding='max_length',
-                #text_target=examples['target'],
-                return_tensors='pt',
             )
             #labels["input_ids"] = [
              #   [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]

         rouge_metric = evaluate.load("rouge", cache_dir='/data/cache')
         def compute_metrics(eval_preds):
             preds, labels = eval_preds
+            if isinstance(preds, tuple):
                 preds = preds[0]
             # Replace -100s used for padding as we can't decode them
             # Assuming 'text' is the input and 'target' is the expected output
             model_inputs = tokenizer(
                 examples['text'],
+                #max_length=max_length,  # Set to None for dynamic padding
+                #truncation=True,
+                #padding='max_length',
+                #return_tensors='pt',
             )
             # Setup the decoder input IDs (shifted right)
             labels = tokenizer(
                 examples['target'],
+                #max_length=128,  # Set to None for dynamic padding
+                #truncation=True,
+                #padding='max_length',
+                ##text_target=examples['target'],
+                #return_tensors='pt',
             )
             #labels["input_ids"] = [
              #   [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]