Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +52 -4

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bad9034785763b086b60a73ca966d0ddc01f299b32c43137a9d43513800d579
 size 536991984

 version https://git-lfs.github.com/spec/v1
+oid sha256:21ba522228845174d32cacd7669d20a73f3023311098562af2f92c68ec74141f
 size 536991984

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a0be7dc8b6c2c3408c9f271d985ac75371c63f7f6e395e7647353a0c823bf5a8
 size 273692564

 version https://git-lfs.github.com/spec/v1
+oid sha256:f89bb4f12f1bc27da4b337212804d8fbe0d4d436638a7a785f9b382734e8738c
 size 273692564

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d3e3111608832edd4c3ec5c3f7270df3317520d5f3b484383a9ebced45d72b0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:10f7b4a7087f66c43babf2b65b54d2577d3e2a7ac4870488e548b8ce38a147b3
 size 14244

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1fdaf50130575b5890af18d121235b5d095d7e70fa6a567281438c1db678114e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f9933be5dcd705040f69563dec6acc92d3f8a3165edb72320c05d24fa315f93
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.0,
   "eval_steps": 500,
-  "global_step": 128,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -207,6 +207,54 @@
       "loss": 1.136,
       "mean_token_accuracy": 0.7119275834411383,
       "step": 125
     }
   ],
   "logging_steps": 5,
@@ -221,12 +269,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.738212146404147e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.864,
   "eval_steps": 500,
+  "global_step": 155,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "loss": 1.136,
       "mean_token_accuracy": 0.7119275834411383,
       "step": 125
+    },
+    {
+      "epoch": 4.064,
+      "grad_norm": 0.0652015432715416,
+      "learning_rate": 6.968339090999186e-08,
+      "loss": 1.1041,
+      "mean_token_accuracy": 0.7163285718244665,
+      "step": 130
+    },
+    {
+      "epoch": 4.224,
+      "grad_norm": 0.06359368562698364,
+      "learning_rate": 4.498244441786675e-08,
+      "loss": 1.1359,
+      "mean_token_accuracy": 0.7156530544161797,
+      "step": 135
+    },
+    {
+      "epoch": 4.384,
+      "grad_norm": 0.06807275116443634,
+      "learning_rate": 2.547212649466568e-08,
+      "loss": 1.1229,
+      "mean_token_accuracy": 0.7130052808672189,
+      "step": 140
+    },
+    {
+      "epoch": 4.5440000000000005,
+      "grad_norm": 0.06558381021022797,
+      "learning_rate": 1.1375001769727999e-08,
+      "loss": 1.0983,
+      "mean_token_accuracy": 0.7218218572437763,
+      "step": 145
+    },
+    {
+      "epoch": 4.704,
+      "grad_norm": 0.06619936972856522,
+      "learning_rate": 2.851883682973233e-09,
+      "loss": 1.1149,
+      "mean_token_accuracy": 0.7202201712876558,
+      "step": 150
+    },
+    {
+      "epoch": 4.864,
+      "grad_norm": 0.0672445297241211,
+      "learning_rate": 0.0,
+      "loss": 1.0885,
+      "mean_token_accuracy": 0.7229270905256271,
+      "step": 155
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.40617510529582e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null