|
{ |
|
"best_metric": 0.9714913368225098, |
|
"best_model_checkpoint": "saves/qwen_7b/lora/sft/checkpoint-1126", |
|
"epoch": 2.9955555555555557, |
|
"eval_steps": 500, |
|
"global_step": 1686, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.7376002073287964, |
|
"learning_rate": 2.958579881656805e-05, |
|
"loss": 1.36, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.946097731590271, |
|
"learning_rate": 4.9948499337185685e-05, |
|
"loss": 1.0892, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.8915076851844788, |
|
"learning_rate": 4.9085642733870516e-05, |
|
"loss": 1.0346, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.952915370464325, |
|
"learning_rate": 4.7193504727388485e-05, |
|
"loss": 1.0148, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.9156692028045654, |
|
"learning_rate": 4.435294437904082e-05, |
|
"loss": 1.0175, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.9917173981666565, |
|
"eval_runtime": 119.6686, |
|
"eval_samples_per_second": 4.178, |
|
"eval_steps_per_second": 4.178, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.0657777777777777, |
|
"grad_norm": 0.9918724894523621, |
|
"learning_rate": 4.068535084889406e-05, |
|
"loss": 0.9865, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2435555555555555, |
|
"grad_norm": 1.2041079998016357, |
|
"learning_rate": 3.634745592345962e-05, |
|
"loss": 0.955, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4213333333333333, |
|
"grad_norm": 1.1156851053237915, |
|
"learning_rate": 3.1524636203435906e-05, |
|
"loss": 0.9512, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.5991111111111111, |
|
"grad_norm": 1.1498212814331055, |
|
"learning_rate": 2.642299117736456e-05, |
|
"loss": 0.9495, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.7768888888888887, |
|
"grad_norm": 1.1297115087509155, |
|
"learning_rate": 2.1260535718625273e-05, |
|
"loss": 0.949, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9546666666666668, |
|
"grad_norm": 1.1540333032608032, |
|
"learning_rate": 1.625788338763118e-05, |
|
"loss": 0.9563, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9714913368225098, |
|
"eval_runtime": 119.3272, |
|
"eval_samples_per_second": 4.19, |
|
"eval_steps_per_second": 4.19, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.1315555555555554, |
|
"grad_norm": 1.1917593479156494, |
|
"learning_rate": 1.1628818681133966e-05, |
|
"loss": 0.9097, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.3093333333333335, |
|
"grad_norm": 1.2136421203613281, |
|
"learning_rate": 7.571161116308206e-06, |
|
"loss": 0.8999, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.487111111111111, |
|
"grad_norm": 1.2269536256790161, |
|
"learning_rate": 4.258311565949436e-06, |
|
"loss": 0.8997, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.664888888888889, |
|
"grad_norm": 1.2763224840164185, |
|
"learning_rate": 1.8318421056443574e-06, |
|
"loss": 0.9011, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.8426666666666667, |
|
"grad_norm": 1.2190372943878174, |
|
"learning_rate": 3.954460400907217e-07, |
|
"loss": 0.9017, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.9955555555555557, |
|
"eval_loss": 0.9726876020431519, |
|
"eval_runtime": 119.2495, |
|
"eval_samples_per_second": 4.193, |
|
"eval_steps_per_second": 4.193, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 2.9955555555555557, |
|
"step": 1686, |
|
"total_flos": 5.872683475324109e+17, |
|
"train_loss": 0.9816320471350691, |
|
"train_runtime": 11250.2702, |
|
"train_samples_per_second": 1.2, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1686, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.872683475324109e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|