Commit
·
3328618
1
Parent(s):
df93d1e
Azam
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scaler.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -18
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2498522505
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2251f8fb6941a79db7b7c8f419235cd1d3a8ab37174d57517d90234d4e41a471
|
3 |
size 2498522505
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1266130541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93536e6e0a8eb02eb18680c9457964a62569508e9def6f8c9dc2c61aac2fa178
|
3 |
size 1266130541
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59d3437cc781f60db7f0c4ec11b98c7820ced9da2ab517212a97208c81e0a031
|
3 |
size 14503
|
scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8139a2d5a513b92ca7751a538153ea76a749e3742836b6044f8c530ae1149fb6
|
3 |
size 559
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:748aacfdb5198e403336e1866da3fd45d311cf7ebe83e327e4ae797f20a0f0f0
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1101,26 +1101,11 @@
|
|
1101 |
"eval_samples_per_second": 6.423,
|
1102 |
"eval_steps_per_second": 1.606,
|
1103 |
"step": 730
|
1104 |
-
},
|
1105 |
-
{
|
1106 |
-
"epoch": 2.57,
|
1107 |
-
"learning_rate": 1.4467592592592593e-05,
|
1108 |
-
"loss": 0.3625,
|
1109 |
-
"step": 740
|
1110 |
-
},
|
1111 |
-
{
|
1112 |
-
"epoch": 2.57,
|
1113 |
-
"eval_accuracy": 0.8003472089767456,
|
1114 |
-
"eval_loss": 0.5525452494621277,
|
1115 |
-
"eval_runtime": 90.5733,
|
1116 |
-
"eval_samples_per_second": 6.359,
|
1117 |
-
"eval_steps_per_second": 1.59,
|
1118 |
-
"step": 740
|
1119 |
}
|
1120 |
],
|
1121 |
"max_steps": 864,
|
1122 |
"num_train_epochs": 3,
|
1123 |
-
"total_flos": 2.
|
1124 |
"trial_name": null,
|
1125 |
"trial_params": null
|
1126 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.5347222222222223,
|
5 |
+
"global_step": 730,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1101 |
"eval_samples_per_second": 6.423,
|
1102 |
"eval_steps_per_second": 1.606,
|
1103 |
"step": 730
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1104 |
}
|
1105 |
],
|
1106 |
"max_steps": 864,
|
1107 |
"num_train_epochs": 3,
|
1108 |
+
"total_flos": 2.1573660231214095e+18,
|
1109 |
"trial_name": null,
|
1110 |
"trial_params": null
|
1111 |
}
|