|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 594.6719055175781, |
|
"epoch": 0.017057569296375266, |
|
"grad_norm": 0.3873046040534973, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": -0.0015, |
|
"reward": 0.5937500223517418, |
|
"reward_std": 0.36258383840322495, |
|
"rewards/accuracy_reward": 0.5937500223517418, |
|
"rewards/format_reward": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 608.0756406784058, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 0.40932849049568176, |
|
"kl": 0.0003732144832611084, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0205, |
|
"reward": 0.591238864697516, |
|
"reward_std": 0.3592316168360412, |
|
"rewards/accuracy_reward": 0.591238864697516, |
|
"rewards/format_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 611.1538192749024, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 1.189740777015686, |
|
"kl": 0.01576576232910156, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.0602, |
|
"reward": 0.6685268126428128, |
|
"reward_std": 0.3173162743449211, |
|
"rewards/accuracy_reward": 0.6685268126428128, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 604.9509216308594, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.22091668844223022, |
|
"kl": 0.006317520141601562, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.0681, |
|
"reward": 0.7582589656114578, |
|
"reward_std": 0.2310322556644678, |
|
"rewards/accuracy_reward": 0.7582589656114578, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 593.3093933105469, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.23042617738246918, |
|
"kl": 0.003882598876953125, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.0576, |
|
"reward": 0.7727678909897804, |
|
"reward_std": 0.19997056629508733, |
|
"rewards/accuracy_reward": 0.7727678909897804, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 596.6232421875, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 0.24932904541492462, |
|
"kl": 0.00549468994140625, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.037, |
|
"reward": 0.7665178909897804, |
|
"reward_std": 0.17505639586597682, |
|
"rewards/accuracy_reward": 0.7665178909897804, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 601.7830589294433, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 0.12253822386264801, |
|
"kl": 0.003799247741699219, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.0402, |
|
"reward": 0.7544643223285675, |
|
"reward_std": 0.1821566427126527, |
|
"rewards/accuracy_reward": 0.7544643223285675, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 591.4761390686035, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.21455420553684235, |
|
"kl": 0.007158660888671875, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.0349, |
|
"reward": 0.7428571835160256, |
|
"reward_std": 0.17305572014302015, |
|
"rewards/accuracy_reward": 0.7428571835160256, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 577.2694511413574, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.1763172149658203, |
|
"kl": 0.0064422607421875, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.0448, |
|
"reward": 0.7700893267989158, |
|
"reward_std": 0.18249586317688227, |
|
"rewards/accuracy_reward": 0.7700893267989158, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 596.8049400329589, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.13369952142238617, |
|
"kl": 0.006565475463867187, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.0355, |
|
"reward": 0.7477678924798965, |
|
"reward_std": 0.18774770591408013, |
|
"rewards/accuracy_reward": 0.7477678924798965, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 595.1364059448242, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.2092757225036621, |
|
"kl": 0.012014389038085938, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.0392, |
|
"reward": 0.745312537252903, |
|
"reward_std": 0.18103930186480283, |
|
"rewards/accuracy_reward": 0.745312537252903, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 581.1323883056641, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 0.11179689317941666, |
|
"kl": 0.011909866333007812, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.0323, |
|
"reward": 0.7830357506871224, |
|
"reward_std": 0.1971761178225279, |
|
"rewards/accuracy_reward": 0.7830357506871224, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 583.4685948689779, |
|
"epoch": 0.9893390191897654, |
|
"kl": 0.010700225830078125, |
|
"reward": 0.772693487505118, |
|
"reward_std": 0.18821328474829593, |
|
"rewards/accuracy_reward": 0.772693487505118, |
|
"rewards/format_reward": 0.0, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04097457230893959, |
|
"train_runtime": 12199.579, |
|
"train_samples_per_second": 0.615, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|