{ "epoch": 0.9957081545064378, "eval_logits/chosen": 0.6785366535186768, "eval_logits/rejected": 0.5101125240325928, "eval_logps/chosen": -353.8060607910156, "eval_logps/rejected": -244.0733184814453, "eval_loss": 0.6846491098403931, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.03933782875537872, "eval_rewards/margins": 0.03162214532494545, "eval_rewards/rejected": -0.07095997035503387, "eval_runtime": 9.0831, "eval_samples_per_second": 1.101, "eval_steps_per_second": 0.22 }