{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.71484375, "epoch": 0.0, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 1.5873015873015872e-08, "loss": 0.7017, "projector_lr": 4.761904761904762e-08, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.01251220703125, "rewards_train/margins": 0.0, "rewards_train/rejected": -0.01251220703125, "sft_loss": 0.83203125, "step": 1 }, { "dpo_loss": 0.6953125, "epoch": 0.0, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 3.1746031746031744e-08, "loss": 0.6995, "projector_lr": 9.523809523809524e-08, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.003143310546875, "rewards_train/margins": 0.003143310546875, "rewards_train/rejected": 0.0, "sft_loss": 0.79296875, "step": 2 }, { "dpo_loss": 0.71484375, "epoch": 0.0, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 4.7619047619047613e-08, "loss": 0.7061, "projector_lr": 1.4285714285714285e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.01251220703125, "rewards_train/margins": -0.0281982421875, "rewards_train/rejected": 0.015625, "sft_loss": 0.578125, "step": 3 }, { "dpo_loss": 0.7109375, "epoch": 0.0, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 6.349206349206349e-08, "loss": 0.708, "projector_lr": 1.9047619047619048e-07, "rewards_train/accuracies": 0.125, "rewards_train/chosen": 0.006256103515625, "rewards_train/margins": -0.03759765625, "rewards_train/rejected": 0.043701171875, "sft_loss": 0.7265625, "step": 4 }, { "dpo_loss": 0.6796875, "epoch": 0.0, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 7.936507936507936e-08, "loss": 0.7097, "projector_lr": 2.380952380952381e-07, "rewards_train/accuracies": 0.125, "rewards_train/chosen": 0.01171875, "rewards_train/margins": 0.0242919921875, "rewards_train/rejected": -0.01251220703125, "sft_loss": 0.8046875, "step": 5 }, { "dpo_loss": 0.6953125, "epoch": 0.0, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 9.523809523809523e-08, "loss": 0.7019, "projector_lr": 2.857142857142857e-07, "rewards_train/accuracies": 0.25, "rewards_train/chosen": -0.0281982421875, "rewards_train/margins": -0.015625, "rewards_train/rejected": -0.01251220703125, "sft_loss": 0.6640625, "step": 6 }, { "dpo_loss": 0.6875, "epoch": 0.0, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 1.111111111111111e-07, "loss": 0.6929, "projector_lr": 3.333333333333333e-07, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.0203857421875, "rewards_train/margins": -0.0031280517578125, "rewards_train/rejected": -0.0172119140625, "sft_loss": 0.9296875, "step": 7 }, { "dpo_loss": 0.6875, "epoch": 0.0, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 1.2698412698412698e-07, "loss": 0.6897, "projector_lr": 3.8095238095238096e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.01251220703125, "rewards_train/margins": 0.015625, "rewards_train/rejected": -0.003143310546875, "sft_loss": 0.73046875, "step": 8 }, { "dpo_loss": 0.703125, "epoch": 0.0, "final_loss": 0.703125, "grad_norm": 0.0, "learning_rate": 1.4285714285714285e-07, "loss": 0.7227, "projector_lr": 4.2857142857142857e-07, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.015625, "rewards_train/margins": -0.015625, "rewards_train/rejected": 0.0, "sft_loss": 0.9375, "step": 9 }, { "dpo_loss": 0.69921875, "epoch": 0.0, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 1.5873015873015872e-07, "loss": 0.7063, "projector_lr": 4.761904761904762e-07, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.03125, "rewards_train/margins": -0.0093994140625, "rewards_train/rejected": 0.040771484375, "sft_loss": 0.66796875, "step": 10 }, { "dpo_loss": 0.6796875, "epoch": 0.0, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 1.7460317460317458e-07, "loss": 0.6895, "projector_lr": 5.238095238095238e-07, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.034423828125, "rewards_train/margins": 0.0281982421875, "rewards_train/rejected": 0.006256103515625, "sft_loss": 0.59765625, "step": 11 }, { "dpo_loss": 0.7109375, "epoch": 0.0, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 1.9047619047619045e-07, "loss": 0.6931, "projector_lr": 5.714285714285714e-07, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.01251220703125, "rewards_train/margins": -0.03125, "rewards_train/rejected": 0.043701171875, "sft_loss": 0.69140625, "step": 12 }, { "dpo_loss": 0.6953125, "epoch": 0.0, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 2.0634920634920632e-07, "loss": 0.7078, "projector_lr": 6.19047619047619e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.043701171875, "rewards_train/margins": 0.006256103515625, "rewards_train/rejected": -0.050048828125, "sft_loss": 0.9453125, "step": 13 }, { "dpo_loss": 0.70703125, "epoch": 0.0, "final_loss": 0.70703125, "grad_norm": 0.0, "learning_rate": 2.222222222222222e-07, "loss": 0.7065, "projector_lr": 6.666666666666666e-07, "rewards_train/accuracies": 0.125, "rewards_train/chosen": -0.0218505859375, "rewards_train/margins": -0.0281982421875, "rewards_train/rejected": 0.006256103515625, "sft_loss": 0.75, "step": 14 }, { "dpo_loss": 0.69140625, "epoch": 0.0, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 2.3809523809523806e-07, "loss": 0.6863, "projector_lr": 7.142857142857143e-07, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.015625, "rewards_train/margins": 0.0, "rewards_train/rejected": 0.015625, "sft_loss": 0.64453125, "step": 15 }, { "dpo_loss": 0.65234375, "epoch": 0.0, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 2.5396825396825396e-07, "loss": 0.6604, "projector_lr": 7.619047619047619e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.0093994140625, "rewards_train/margins": 0.06884765625, "rewards_train/rejected": -0.059326171875, "sft_loss": 0.5078125, "step": 16 }, { "dpo_loss": 0.68359375, "epoch": 0.0, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 2.698412698412698e-07, "loss": 0.7007, "projector_lr": 8.095238095238095e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.0093994140625, "rewards_train/margins": 0.0281982421875, "rewards_train/rejected": -0.018798828125, "sft_loss": 0.734375, "step": 17 }, { "dpo_loss": 0.71484375, "epoch": 0.0, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 2.857142857142857e-07, "loss": 0.6895, "projector_lr": 8.571428571428571e-07, "rewards_train/accuracies": 0.0, "rewards_train/chosen": -0.01251220703125, "rewards_train/margins": -0.03759765625, "rewards_train/rejected": 0.0250244140625, "sft_loss": 0.70703125, "step": 18 }, { "dpo_loss": 0.6796875, "epoch": 0.0, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 3.0158730158730156e-07, "loss": 0.6958, "projector_lr": 9.047619047619047e-07, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.0218505859375, "rewards_train/margins": 0.0281982421875, "rewards_train/rejected": -0.006256103515625, "sft_loss": 0.61328125, "step": 19 }, { "dpo_loss": 0.6875, "epoch": 0.0, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 3.1746031746031743e-07, "loss": 0.6946, "projector_lr": 9.523809523809523e-07, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.01251220703125, "rewards_train/margins": 0.006256103515625, "rewards_train/rejected": 0.006256103515625, "sft_loss": 0.75390625, "step": 20 }, { "dpo_loss": 0.6953125, "epoch": 0.0, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 3.333333333333333e-07, "loss": 0.6948, "projector_lr": 1e-06, "rewards_train/accuracies": 0.125, "rewards_train/chosen": -0.006256103515625, "rewards_train/margins": -0.006256103515625, "rewards_train/rejected": 0.0, "sft_loss": 0.67578125, "step": 21 }, { "dpo_loss": 0.66796875, "epoch": 0.0, "final_loss": 0.66796875, "grad_norm": 0.0, "learning_rate": 3.4920634920634917e-07, "loss": 0.6768, "projector_lr": 1.0476190476190476e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.0218505859375, "rewards_train/margins": 0.06005859375, "rewards_train/rejected": -0.08203125, "sft_loss": 0.984375, "step": 22 }, { "dpo_loss": 0.7109375, "epoch": 0.0, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 3.6507936507936504e-07, "loss": 0.7051, "projector_lr": 1.0952380952380952e-06, "rewards_train/accuracies": 0.125, "rewards_train/chosen": -0.0250244140625, "rewards_train/margins": -0.034423828125, "rewards_train/rejected": 0.0093994140625, "sft_loss": 0.66796875, "step": 23 }, { "dpo_loss": 0.671875, "epoch": 0.0, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 3.809523809523809e-07, "loss": 0.6882, "projector_lr": 1.1428571428571428e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.006256103515625, "rewards_train/margins": 0.05615234375, "rewards_train/rejected": -0.050048828125, "sft_loss": 0.7578125, "step": 24 }, { "dpo_loss": 0.69140625, "epoch": 0.0, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 3.968253968253968e-07, "loss": 0.6902, "projector_lr": 1.1904761904761904e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.01251220703125, "rewards_train/margins": 0.0031280517578125, "rewards_train/rejected": 0.0093994140625, "sft_loss": 0.8046875, "step": 25 }, { "dpo_loss": 0.6953125, "epoch": 0.0, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 4.1269841269841265e-07, "loss": 0.696, "projector_lr": 1.238095238095238e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": -0.0234375, "rewards_train/margins": -0.010986328125, "rewards_train/rejected": -0.01251220703125, "sft_loss": 0.58203125, "step": 26 }, { "dpo_loss": 0.6640625, "epoch": 0.0, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 4.285714285714285e-07, "loss": 0.6926, "projector_lr": 1.2857142857142856e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.028076171875, "rewards_train/margins": 0.059326171875, "rewards_train/rejected": -0.03125, "sft_loss": 0.52734375, "step": 27 }, { "dpo_loss": 0.70703125, "epoch": 0.0, "final_loss": 0.70703125, "grad_norm": 0.0, "learning_rate": 4.444444444444444e-07, "loss": 0.697, "projector_lr": 1.3333333333333332e-06, "rewards_train/accuracies": 0.125, "rewards_train/chosen": -0.03759765625, "rewards_train/margins": -0.0250244140625, "rewards_train/rejected": -0.0125732421875, "sft_loss": 0.765625, "step": 28 }, { "dpo_loss": 0.6484375, "epoch": 0.0, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 4.6031746031746025e-07, "loss": 0.6626, "projector_lr": 1.3809523809523808e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.0, "rewards_train/margins": 0.06591796875, "rewards_train/rejected": -0.06591796875, "sft_loss": 0.91015625, "step": 29 }, { "dpo_loss": 0.65234375, "epoch": 0.0, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 4.761904761904761e-07, "loss": 0.676, "projector_lr": 1.4285714285714286e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.03759765625, "rewards_train/margins": 0.08154296875, "rewards_train/rejected": -0.043701171875, "sft_loss": 0.69921875, "step": 30 }, { "dpo_loss": 0.6796875, "epoch": 0.0, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 4.92063492063492e-07, "loss": 0.6924, "projector_lr": 1.4761904761904762e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.0, "rewards_train/margins": 0.0250244140625, "rewards_train/rejected": -0.0250244140625, "sft_loss": 0.875, "step": 31 }, { "dpo_loss": 0.73828125, "epoch": 0.01, "final_loss": 0.73828125, "grad_norm": 0.0, "learning_rate": 5.079365079365079e-07, "loss": 0.7246, "projector_lr": 1.5238095238095238e-06, "rewards_train/accuracies": 0.0, "rewards_train/chosen": -0.043701171875, "rewards_train/margins": -0.0810546875, "rewards_train/rejected": 0.03759765625, "sft_loss": 0.6640625, "step": 32 }, { "dpo_loss": 0.73046875, "epoch": 0.01, "final_loss": 0.73046875, "grad_norm": 0.0, "learning_rate": 5.238095238095238e-07, "loss": 0.7053, "projector_lr": 1.5714285714285714e-06, "rewards_train/accuracies": 0.125, "rewards_train/chosen": -0.043701171875, "rewards_train/margins": -0.06884765625, "rewards_train/rejected": 0.0250244140625, "sft_loss": 0.765625, "step": 33 }, { "dpo_loss": 0.66796875, "epoch": 0.01, "final_loss": 0.66796875, "grad_norm": 0.0, "learning_rate": 5.396825396825396e-07, "loss": 0.678, "projector_lr": 1.619047619047619e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.01251220703125, "rewards_train/margins": 0.050048828125, "rewards_train/rejected": -0.03759765625, "sft_loss": 0.87109375, "step": 34 }, { "dpo_loss": 0.62890625, "epoch": 0.01, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 5.555555555555555e-07, "loss": 0.6582, "projector_lr": 1.6666666666666669e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.0218505859375, "rewards_train/margins": 0.103515625, "rewards_train/rejected": -0.125, "sft_loss": 0.69140625, "step": 35 }, { "dpo_loss": 0.6953125, "epoch": 0.01, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 5.714285714285714e-07, "loss": 0.697, "projector_lr": 1.7142857142857143e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.018798828125, "rewards_train/margins": 6.103515625e-05, "rewards_train/rejected": -0.018798828125, "sft_loss": 0.8125, "step": 36 }, { "dpo_loss": 0.671875, "epoch": 0.01, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 5.873015873015873e-07, "loss": 0.6733, "projector_lr": 1.761904761904762e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.0625, "rewards_train/margins": 0.04052734375, "rewards_train/rejected": -0.10302734375, "sft_loss": 0.5625, "step": 37 }, { "dpo_loss": 0.6875, "epoch": 0.01, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 6.031746031746031e-07, "loss": 0.6626, "projector_lr": 1.8095238095238095e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.03125, "rewards_train/margins": 0.015625, "rewards_train/rejected": -0.046875, "sft_loss": 0.81640625, "step": 38 }, { "dpo_loss": 0.68359375, "epoch": 0.01, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 6.19047619047619e-07, "loss": 0.6814, "projector_lr": 1.8571428571428573e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.018798828125, "rewards_train/margins": 0.02587890625, "rewards_train/rejected": -0.007049560546875, "sft_loss": 0.77734375, "step": 39 }, { "dpo_loss": 0.68359375, "epoch": 0.01, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 6.349206349206349e-07, "loss": 0.6831, "projector_lr": 1.9047619047619047e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": 0.0031280517578125, "rewards_train/margins": 0.015625, "rewards_train/rejected": -0.01251220703125, "sft_loss": 0.8203125, "step": 40 }, { "dpo_loss": 0.625, "epoch": 0.01, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 6.507936507936507e-07, "loss": 0.6133, "projector_lr": 1.9523809523809523e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.034423828125, "rewards_train/margins": 0.1474609375, "rewards_train/rejected": -0.181640625, "sft_loss": 0.6484375, "step": 41 }, { "dpo_loss": 0.69921875, "epoch": 0.01, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 6.666666666666666e-07, "loss": 0.6821, "projector_lr": 2e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": 0.0093994140625, "rewards_train/margins": -0.00933837890625, "rewards_train/rejected": 0.018798828125, "sft_loss": 0.66015625, "step": 42 }, { "dpo_loss": 0.671875, "epoch": 0.01, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 6.825396825396826e-07, "loss": 0.6714, "projector_lr": 2.0476190476190475e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.0439453125, "rewards_train/margins": 0.03759765625, "rewards_train/rejected": -0.08154296875, "sft_loss": 0.66015625, "step": 43 }, { "dpo_loss": 0.6953125, "epoch": 0.01, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 6.984126984126983e-07, "loss": 0.6938, "projector_lr": 2.095238095238095e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": -0.03125, "rewards_train/margins": -3.0517578125e-05, "rewards_train/rejected": -0.03125, "sft_loss": 0.734375, "step": 44 }, { "dpo_loss": 0.66796875, "epoch": 0.01, "final_loss": 0.66796875, "grad_norm": 0.0, "learning_rate": 7.142857142857143e-07, "loss": 0.6624, "projector_lr": 2.142857142857143e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.018798828125, "rewards_train/margins": 0.050048828125, "rewards_train/rejected": -0.03125, "sft_loss": 0.5859375, "step": 45 }, { "dpo_loss": 0.69140625, "epoch": 0.01, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 7.301587301587301e-07, "loss": 0.6982, "projector_lr": 2.1904761904761903e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": -0.07177734375, "rewards_train/margins": 0.0031280517578125, "rewards_train/rejected": -0.0751953125, "sft_loss": 0.6875, "step": 46 }, { "dpo_loss": 0.69921875, "epoch": 0.01, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 7.46031746031746e-07, "loss": 0.7029, "projector_lr": 2.2380952380952384e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": -0.040771484375, "rewards_train/margins": -0.00628662109375, "rewards_train/rejected": -0.034423828125, "sft_loss": 0.61328125, "step": 47 }, { "dpo_loss": 0.65625, "epoch": 0.01, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 7.619047619047618e-07, "loss": 0.6714, "projector_lr": 2.2857142857142856e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 0.07177734375, "rewards_train/rejected": -0.1474609375, "sft_loss": 0.7890625, "step": 48 }, { "dpo_loss": 0.6953125, "epoch": 0.01, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 7.777777777777778e-07, "loss": 0.6838, "projector_lr": 2.3333333333333336e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.062255859375, "rewards_train/margins": 0.0003604888916015625, "rewards_train/rejected": -0.0625, "sft_loss": 0.484375, "step": 49 }, { "dpo_loss": 0.71875, "epoch": 0.01, "final_loss": 0.71875, "grad_norm": 0.0, "learning_rate": 7.936507936507936e-07, "loss": 0.6951, "projector_lr": 2.3809523809523808e-06, "rewards_train/accuracies": 0.25, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": -0.0439453125, "rewards_train/rejected": -0.09375, "sft_loss": 0.70703125, "step": 50 }, { "dpo_loss": 0.61328125, "epoch": 0.01, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 8.095238095238095e-07, "loss": 0.6593, "projector_lr": 2.428571428571429e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.05322265625, "rewards_train/margins": 0.1689453125, "rewards_train/rejected": -0.22265625, "sft_loss": 0.578125, "step": 51 }, { "dpo_loss": 0.5625, "epoch": 0.01, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 8.253968253968253e-07, "loss": 0.6283, "projector_lr": 2.476190476190476e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.08154296875, "rewards_train/margins": 0.306640625, "rewards_train/rejected": -0.38671875, "sft_loss": 0.6640625, "step": 52 }, { "dpo_loss": 0.7109375, "epoch": 0.01, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 8.412698412698413e-07, "loss": 0.7175, "projector_lr": 2.523809523809524e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.103515625, "rewards_train/margins": -0.0281982421875, "rewards_train/rejected": -0.0751953125, "sft_loss": 0.70703125, "step": 53 }, { "dpo_loss": 0.640625, "epoch": 0.01, "final_loss": 0.640625, "grad_norm": 0.0, "learning_rate": 8.57142857142857e-07, "loss": 0.6682, "projector_lr": 2.571428571428571e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.0250244140625, "rewards_train/margins": 0.119140625, "rewards_train/rejected": -0.09375, "sft_loss": 0.7265625, "step": 54 }, { "dpo_loss": 0.68359375, "epoch": 0.01, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 8.73015873015873e-07, "loss": 0.6721, "projector_lr": 2.6190476190476192e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.032958984375, "rewards_train/margins": 0.03662109375, "rewards_train/rejected": -0.0693359375, "sft_loss": 0.984375, "step": 55 }, { "dpo_loss": 0.6484375, "epoch": 0.01, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 8.888888888888888e-07, "loss": 0.6726, "projector_lr": 2.6666666666666664e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 0.078125, "rewards_train/rejected": -0.1533203125, "sft_loss": 0.76953125, "step": 56 }, { "dpo_loss": 0.69921875, "epoch": 0.01, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 9.047619047619047e-07, "loss": 0.6161, "projector_lr": 2.7142857142857144e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.056396484375, "rewards_train/margins": -0.01251220703125, "rewards_train/rejected": -0.0439453125, "sft_loss": 0.8671875, "step": 57 }, { "dpo_loss": 0.62890625, "epoch": 0.01, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.206349206349205e-07, "loss": 0.6476, "projector_lr": 2.7619047619047616e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.043701171875, "rewards_train/margins": 0.2060546875, "rewards_train/rejected": -0.25, "sft_loss": 0.796875, "step": 58 }, { "dpo_loss": 0.69140625, "epoch": 0.01, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 9.365079365079365e-07, "loss": 0.6538, "projector_lr": 2.8095238095238096e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.11865234375, "rewards_train/margins": 0.025146484375, "rewards_train/rejected": -0.1435546875, "sft_loss": 0.75, "step": 59 }, { "dpo_loss": 0.640625, "epoch": 0.01, "final_loss": 0.640625, "grad_norm": 0.0, "learning_rate": 9.523809523809522e-07, "loss": 0.6885, "projector_lr": 2.8571428571428573e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.0654296875, "rewards_train/margins": 0.109375, "rewards_train/rejected": -0.1748046875, "sft_loss": 0.56640625, "step": 60 }, { "dpo_loss": 0.62890625, "epoch": 0.01, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.682539682539682e-07, "loss": 0.6765, "projector_lr": 2.904761904761905e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": 0.11865234375, "rewards_train/rejected": -0.255859375, "sft_loss": 0.70703125, "step": 61 }, { "dpo_loss": 0.6640625, "epoch": 0.01, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 9.84126984126984e-07, "loss": 0.666, "projector_lr": 2.9523809523809525e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.0439453125, "rewards_train/margins": 0.0751953125, "rewards_train/rejected": -0.119140625, "sft_loss": 0.78125, "step": 62 }, { "dpo_loss": 0.6875, "epoch": 0.01, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.653, "projector_lr": 3e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.15625, "rewards_train/margins": 0.03125, "rewards_train/rejected": -0.1875, "sft_loss": 0.734375, "step": 63 }, { "dpo_loss": 0.625, "epoch": 0.01, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 9.999999355416012e-07, "loss": 0.6334, "projector_lr": 2.9999998066248037e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.150390625, "rewards_train/margins": 0.162109375, "rewards_train/rejected": -0.3125, "sft_loss": 0.66015625, "step": 64 }, { "dpo_loss": 0.6015625, "epoch": 0.01, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 9.999997421664215e-07, "loss": 0.6331, "projector_lr": 2.999999226499265e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.10205078125, "rewards_train/margins": 0.435546875, "rewards_train/rejected": -0.53515625, "sft_loss": 0.66796875, "step": 65 }, { "dpo_loss": 0.69140625, "epoch": 0.01, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 9.99999419874511e-07, "loss": 0.6407, "projector_lr": 2.999998259623533e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.0810546875, "rewards_train/margins": 0.0062255859375, "rewards_train/rejected": -0.08740234375, "sft_loss": 0.640625, "step": 66 }, { "dpo_loss": 0.734375, "epoch": 0.01, "final_loss": 0.734375, "grad_norm": 0.0, "learning_rate": 9.999989686659524e-07, "loss": 0.7007, "projector_lr": 2.9999969059978575e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.1943359375, "rewards_train/margins": -0.015625, "rewards_train/rejected": -0.1787109375, "sft_loss": 0.6328125, "step": 67 }, { "dpo_loss": 0.609375, "epoch": 0.01, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 9.999983885408623e-07, "loss": 0.6637, "projector_lr": 2.9999951656225872e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.050048828125, "rewards_train/margins": 0.17578125, "rewards_train/rejected": -0.2255859375, "sft_loss": 0.828125, "step": 68 }, { "dpo_loss": 0.578125, "epoch": 0.01, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 9.999976794993902e-07, "loss": 0.6188, "projector_lr": 2.999993038498171e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.3125, "rewards_train/margins": 0.31640625, "rewards_train/rejected": -0.62890625, "sft_loss": 0.609375, "step": 69 }, { "dpo_loss": 0.63671875, "epoch": 0.01, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 9.999968415417191e-07, "loss": 0.6411, "projector_lr": 2.9999905246251573e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 0.193359375, "rewards_train/rejected": -0.5546875, "sft_loss": 0.640625, "step": 70 }, { "dpo_loss": 0.67578125, "epoch": 0.01, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 9.999958746680646e-07, "loss": 0.6376, "projector_lr": 2.9999876240041937e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.3125, "rewards_train/margins": 0.0625, "rewards_train/rejected": -0.375, "sft_loss": 0.796875, "step": 71 }, { "dpo_loss": 0.69921875, "epoch": 0.01, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 9.999947788786763e-07, "loss": 0.6578, "projector_lr": 2.9999843366360294e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.125, "rewards_train/margins": -0.006256103515625, "rewards_train/rejected": -0.11865234375, "sft_loss": 0.8125, "step": 72 }, { "dpo_loss": 0.6640625, "epoch": 0.01, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 9.999935541738369e-07, "loss": 0.6666, "projector_lr": 2.9999806625215106e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.234375, "rewards_train/margins": 0.080078125, "rewards_train/rejected": -0.314453125, "sft_loss": 0.734375, "step": 73 }, { "dpo_loss": 0.6484375, "epoch": 0.01, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 9.999922005538618e-07, "loss": 0.6591, "projector_lr": 2.9999766016615855e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.197265625, "rewards_train/margins": 0.1689453125, "rewards_train/rejected": -0.3671875, "sft_loss": 0.73046875, "step": 74 }, { "dpo_loss": 0.62890625, "epoch": 0.01, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.999907180191001e-07, "loss": 0.5837, "projector_lr": 2.9999721540573006e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.318359375, "rewards_train/margins": 0.1435546875, "rewards_train/rejected": -0.462890625, "sft_loss": 0.80078125, "step": 75 }, { "dpo_loss": 0.5078125, "epoch": 0.01, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.999891065699342e-07, "loss": 0.5705, "projector_lr": 2.999967319709803e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 0.48828125, "rewards_train/rejected": -0.859375, "sft_loss": 0.7890625, "step": 76 }, { "dpo_loss": 0.58984375, "epoch": 0.01, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 9.999873662067795e-07, "loss": 0.6462, "projector_lr": 2.9999620986203386e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1298828125, "rewards_train/margins": 0.24609375, "rewards_train/rejected": -0.375, "sft_loss": 0.77734375, "step": 77 }, { "dpo_loss": 0.62109375, "epoch": 0.01, "final_loss": 0.62109375, "grad_norm": 0.0, "learning_rate": 9.999854969300848e-07, "loss": 0.6029, "projector_lr": 2.9999564907902546e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.197265625, "rewards_train/margins": 0.1748046875, "rewards_train/rejected": -0.37109375, "sft_loss": 0.5625, "step": 78 }, { "dpo_loss": 0.6796875, "epoch": 0.01, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 9.99983498740332e-07, "loss": 0.6515, "projector_lr": 2.999950496220996e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.333984375, "rewards_train/margins": 0.078125, "rewards_train/rejected": -0.412109375, "sft_loss": 0.7578125, "step": 79 }, { "dpo_loss": 0.57421875, "epoch": 0.01, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.999813716380363e-07, "loss": 0.5988, "projector_lr": 2.9999441149141086e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 0.39453125, "rewards_train/rejected": -0.66796875, "sft_loss": 0.498046875, "step": 80 }, { "dpo_loss": 0.71875, "epoch": 0.01, "final_loss": 0.71875, "grad_norm": 0.0, "learning_rate": 9.99979115623746e-07, "loss": 0.6724, "projector_lr": 2.999937346871238e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 0.0234375, "rewards_train/rejected": -0.57421875, "sft_loss": 0.88671875, "step": 81 }, { "dpo_loss": 0.64453125, "epoch": 0.01, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.999767306980429e-07, "loss": 0.668, "projector_lr": 2.9999301920941288e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 0.1318359375, "rewards_train/rejected": -0.40625, "sft_loss": 0.6953125, "step": 82 }, { "dpo_loss": 0.67578125, "epoch": 0.01, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 9.99974216861542e-07, "loss": 0.6615, "projector_lr": 2.9999226505846264e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.3125, "rewards_train/margins": 0.08154296875, "rewards_train/rejected": -0.39453125, "sft_loss": 0.60546875, "step": 83 }, { "dpo_loss": 0.5625, "epoch": 0.01, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 9.999715741148915e-07, "loss": 0.5847, "projector_lr": 2.9999147223446743e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 0.3203125, "rewards_train/rejected": -0.58984375, "sft_loss": 0.75, "step": 84 }, { "dpo_loss": 0.5859375, "epoch": 0.01, "final_loss": 0.5859375, "grad_norm": 0.0, "learning_rate": 9.999688024587724e-07, "loss": 0.598, "projector_lr": 2.9999064073763176e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.03759765625, "rewards_train/margins": 0.279296875, "rewards_train/rejected": -0.31640625, "sft_loss": 0.515625, "step": 85 }, { "dpo_loss": 0.5703125, "epoch": 0.01, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 9.999659018938997e-07, "loss": 0.614, "projector_lr": 2.9998977056816992e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 0.46875, "rewards_train/rejected": -0.9296875, "sft_loss": 0.5625, "step": 86 }, { "dpo_loss": 0.6953125, "epoch": 0.01, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 9.999628724210213e-07, "loss": 0.6943, "projector_lr": 2.9998886172630643e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.421875, "rewards_train/margins": 0.053466796875, "rewards_train/rejected": -0.474609375, "sft_loss": 0.6015625, "step": 87 }, { "dpo_loss": 0.51953125, "epoch": 0.01, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 9.99959714040918e-07, "loss": 0.5628, "projector_lr": 2.999879142122754e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 1.015625, "rewards_train/rejected": -1.3671875, "sft_loss": 0.640625, "step": 88 }, { "dpo_loss": 0.5234375, "epoch": 0.01, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.999564267544043e-07, "loss": 0.5823, "projector_lr": 2.999869280263213e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.37109375, "rewards_train/margins": 0.451171875, "rewards_train/rejected": -0.82421875, "sft_loss": 0.69921875, "step": 89 }, { "dpo_loss": 0.59375, "epoch": 0.01, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.999530105623278e-07, "loss": 0.5317, "projector_lr": 2.9998590316869835e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 0.228515625, "rewards_train/rejected": -0.68359375, "sft_loss": 0.703125, "step": 90 }, { "dpo_loss": 0.72265625, "epoch": 0.01, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 9.999494654655695e-07, "loss": 0.6716, "projector_lr": 2.999848396396708e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 0.05029296875, "rewards_train/rejected": -0.69921875, "sft_loss": 0.640625, "step": 91 }, { "dpo_loss": 0.55859375, "epoch": 0.01, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 9.999457914650428e-07, "loss": 0.5782, "projector_lr": 2.9998373743951285e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.1474609375, "rewards_train/margins": 0.375, "rewards_train/rejected": -0.5234375, "sft_loss": 0.7578125, "step": 92 }, { "dpo_loss": 0.498046875, "epoch": 0.01, "final_loss": 0.498046875, "grad_norm": 0.0, "learning_rate": 9.999419885616955e-07, "loss": 0.4943, "projector_lr": 2.9998259656850867e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 0.494140625, "rewards_train/rejected": -0.8203125, "sft_loss": 0.8671875, "step": 93 }, { "dpo_loss": 0.64453125, "epoch": 0.02, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.999380567565081e-07, "loss": 0.5591, "projector_lr": 2.9998141702695246e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 0.140625, "rewards_train/rejected": -0.40234375, "sft_loss": 0.66015625, "step": 94 }, { "dpo_loss": 0.412109375, "epoch": 0.02, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 9.999339960504942e-07, "loss": 0.5572, "projector_lr": 2.999801988151483e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 1.109375, "rewards_train/rejected": -1.390625, "sft_loss": 0.59765625, "step": 95 }, { "dpo_loss": 0.55859375, "epoch": 0.02, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 9.999298064447008e-07, "loss": 0.5818, "projector_lr": 2.999789419334103e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.890625, "rewards_train/margins": 0.439453125, "rewards_train/rejected": -1.328125, "sft_loss": 0.68359375, "step": 96 }, { "dpo_loss": 0.69921875, "epoch": 0.02, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 9.999254879402084e-07, "loss": 0.5814, "projector_lr": 2.9997764638206253e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 0.031982421875, "rewards_train/rejected": -0.80078125, "sft_loss": 0.83203125, "step": 97 }, { "dpo_loss": 0.51953125, "epoch": 0.02, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 9.9992104053813e-07, "loss": 0.5617, "projector_lr": 2.9997631216143903e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.53125, "rewards_train/margins": 0.578125, "rewards_train/rejected": -1.109375, "sft_loss": 0.54296875, "step": 98 }, { "dpo_loss": 0.65625, "epoch": 0.02, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 9.999164642396127e-07, "loss": 0.5916, "projector_lr": 2.9997493927188386e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 0.0966796875, "rewards_train/rejected": -0.5625, "sft_loss": 0.71484375, "step": 99 }, { "dpo_loss": 0.4921875, "epoch": 0.02, "final_loss": 0.4921875, "grad_norm": 0.0, "learning_rate": 9.99911759045836e-07, "loss": 0.6037, "projector_lr": 2.999735277137509e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 0.98828125, "rewards_train/rejected": -1.7421875, "sft_loss": 0.70703125, "step": 100 }, { "dpo_loss": 0.60546875, "epoch": 0.02, "final_loss": 0.60546875, "grad_norm": 0.0, "learning_rate": 9.999069249580136e-07, "loss": 0.6402, "projector_lr": 2.999720774874041e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.203125, "rewards_train/margins": 0.373046875, "rewards_train/rejected": -1.578125, "sft_loss": 0.72265625, "step": 101 }, { "dpo_loss": 0.546875, "epoch": 0.02, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 9.999019619773915e-07, "loss": 0.5096, "projector_lr": 2.9997058859321745e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 0.3984375, "rewards_train/rejected": -0.65625, "sft_loss": 1.109375, "step": 102 }, { "dpo_loss": 0.625, "epoch": 0.02, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 9.998968701052492e-07, "loss": 0.6146, "projector_lr": 2.999690610315748e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 0.255859375, "rewards_train/rejected": -0.71484375, "sft_loss": 0.6640625, "step": 103 }, { "dpo_loss": 0.55859375, "epoch": 0.02, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 9.998916493429e-07, "loss": 0.5806, "projector_lr": 2.9996749480287e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 0.80078125, "rewards_train/rejected": -1.7109375, "sft_loss": 0.82421875, "step": 104 }, { "dpo_loss": 0.44921875, "epoch": 0.02, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 9.998862996916898e-07, "loss": 0.5552, "projector_lr": 2.9996588990750695e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 0.765625, "rewards_train/rejected": -1.2578125, "sft_loss": 0.466796875, "step": 105 }, { "dpo_loss": 0.458984375, "epoch": 0.02, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 9.998808211529976e-07, "loss": 0.5325, "projector_lr": 2.999642463458993e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 0.84375, "rewards_train/rejected": -1.3828125, "sft_loss": 0.46875, "step": 106 }, { "dpo_loss": 0.439453125, "epoch": 0.02, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.998752137282363e-07, "loss": 0.5009, "projector_lr": 2.9996256411847093e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 1.453125, "rewards_train/rejected": -2.78125, "sft_loss": 0.6640625, "step": 107 }, { "dpo_loss": 0.66015625, "epoch": 0.02, "final_loss": 0.66015625, "grad_norm": 0.0, "learning_rate": 9.998694774188517e-07, "loss": 0.6082, "projector_lr": 2.9996084322565554e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 0.11669921875, "rewards_train/rejected": -0.70703125, "sft_loss": 0.6015625, "step": 108 }, { "dpo_loss": 0.6015625, "epoch": 0.02, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 9.998636122263226e-07, "loss": 0.5624, "projector_lr": 2.9995908366789683e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.37890625, "rewards_train/margins": 0.388671875, "rewards_train/rejected": -0.765625, "sft_loss": 0.88671875, "step": 109 }, { "dpo_loss": 0.75, "epoch": 0.02, "final_loss": 0.75, "grad_norm": 0.0, "learning_rate": 9.998576181521616e-07, "loss": 0.6786, "projector_lr": 2.999572854456485e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.7265625, "rewards_train/margins": -0.125, "rewards_train/rejected": -0.6015625, "sft_loss": 0.60546875, "step": 110 }, { "dpo_loss": 0.56640625, "epoch": 0.02, "final_loss": 0.56640625, "grad_norm": 0.0, "learning_rate": 9.998514951979138e-07, "loss": 0.5691, "projector_lr": 2.9995544855937417e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 0.2890625, "rewards_train/rejected": -0.45703125, "sft_loss": 0.703125, "step": 111 }, { "dpo_loss": 0.65625, "epoch": 0.02, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 9.99845243365158e-07, "loss": 0.5885, "projector_lr": 2.9995357300954746e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 0.1337890625, "rewards_train/rejected": -0.68359375, "sft_loss": 0.65625, "step": 112 }, { "dpo_loss": 0.578125, "epoch": 0.02, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 9.998388626555062e-07, "loss": 0.608, "projector_lr": 2.9995165879665188e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 1.1171875, "rewards_train/rejected": -1.671875, "sft_loss": 0.625, "step": 113 }, { "dpo_loss": 0.61328125, "epoch": 0.02, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 9.998323530706037e-07, "loss": 0.5558, "projector_lr": 2.999497059211811e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 0.33203125, "rewards_train/rejected": -1.171875, "sft_loss": 0.765625, "step": 114 }, { "dpo_loss": 0.482421875, "epoch": 0.02, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 9.998257146121285e-07, "loss": 0.5645, "projector_lr": 2.9994771438363858e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 0.7421875, "rewards_train/rejected": -0.92578125, "sft_loss": 1.0546875, "step": 115 }, { "dpo_loss": 0.6015625, "epoch": 0.02, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 9.998189472817924e-07, "loss": 0.6191, "projector_lr": 2.9994568418453773e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.78125, "rewards_train/margins": 0.41796875, "rewards_train/rejected": -1.203125, "sft_loss": 0.79296875, "step": 116 }, { "dpo_loss": 0.546875, "epoch": 0.02, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 9.998120510813405e-07, "loss": 0.5347, "projector_lr": 2.9994361532440215e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 0.4921875, "rewards_train/rejected": -1.1484375, "sft_loss": 0.66015625, "step": 117 }, { "dpo_loss": 0.578125, "epoch": 0.02, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 9.998050260125507e-07, "loss": 0.5777, "projector_lr": 2.9994150780376518e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.53125, "rewards_train/margins": 0.392578125, "rewards_train/rejected": -0.92578125, "sft_loss": 0.83984375, "step": 118 }, { "dpo_loss": 0.33203125, "epoch": 0.02, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.99797872077234e-07, "loss": 0.5021, "projector_lr": 2.9993936162317022e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90625, "rewards_train/margins": 1.8046875, "rewards_train/rejected": -2.703125, "sft_loss": 0.67578125, "step": 119 }, { "dpo_loss": 0.53515625, "epoch": 0.02, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 9.997905892772352e-07, "loss": 0.59, "projector_lr": 2.999371767831706e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 0.515625, "rewards_train/rejected": -1.1171875, "sft_loss": 0.7265625, "step": 120 }, { "dpo_loss": 0.34375, "epoch": 0.02, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 9.997831776144322e-07, "loss": 0.3903, "projector_lr": 2.999349532843297e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5625, "rewards_train/margins": 1.65625, "rewards_train/rejected": -2.21875, "sft_loss": 0.6640625, "step": 121 }, { "dpo_loss": 0.59375, "epoch": 0.02, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.997756370907358e-07, "loss": 0.5168, "projector_lr": 2.9993269112722073e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 0.302734375, "rewards_train/rejected": -1.078125, "sft_loss": 0.95703125, "step": 122 }, { "dpo_loss": 0.59765625, "epoch": 0.02, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 9.997679677080902e-07, "loss": 0.6312, "projector_lr": 2.9993039031242704e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.53125, "rewards_train/margins": 0.412109375, "rewards_train/rejected": -0.9453125, "sft_loss": 0.640625, "step": 123 }, { "dpo_loss": 0.6640625, "epoch": 0.02, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 9.997601694684727e-07, "loss": 0.7051, "projector_lr": 2.9992805084054186e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6875, "rewards_train/margins": 0.23046875, "rewards_train/rejected": -0.91796875, "sft_loss": 0.83203125, "step": 124 }, { "dpo_loss": 0.59375, "epoch": 0.02, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.997522423738942e-07, "loss": 0.5902, "projector_lr": 2.9992567271216827e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 0.365234375, "rewards_train/rejected": -0.890625, "sft_loss": 0.77734375, "step": 125 }, { "dpo_loss": 0.64453125, "epoch": 0.02, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.997441864263984e-07, "loss": 0.6169, "projector_lr": 2.9992325592791957e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 0.11767578125, "rewards_train/rejected": -0.57421875, "sft_loss": 0.6953125, "step": 126 }, { "dpo_loss": 0.71484375, "epoch": 0.02, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 9.997360016280624e-07, "loss": 0.6341, "projector_lr": 2.999208004884188e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 0.044189453125, "rewards_train/rejected": -0.443359375, "sft_loss": 0.953125, "step": 127 }, { "dpo_loss": 0.7265625, "epoch": 0.02, "final_loss": 0.7265625, "grad_norm": 0.0, "learning_rate": 9.997276879809968e-07, "loss": 0.7213, "projector_lr": 2.999183063942991e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 0.2119140625, "rewards_train/rejected": -0.40625, "sft_loss": 0.6875, "step": 128 }, { "dpo_loss": 0.47265625, "epoch": 0.02, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 9.997192454873446e-07, "loss": 0.509, "projector_lr": 2.999157736462034e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2470703125, "rewards_train/margins": 0.609375, "rewards_train/rejected": -0.85546875, "sft_loss": 0.70703125, "step": 129 }, { "dpo_loss": 0.5, "epoch": 0.02, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 9.99710674149283e-07, "loss": 0.515, "projector_lr": 2.9991320224478497e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2734375, "rewards_train/margins": 0.55078125, "rewards_train/rejected": -0.82421875, "sft_loss": 0.8984375, "step": 130 }, { "dpo_loss": 0.390625, "epoch": 0.02, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.99701973969022e-07, "loss": 0.478, "projector_lr": 2.9991059219070664e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.326171875, "rewards_train/margins": 1.2890625, "rewards_train/rejected": -1.6171875, "sft_loss": 0.80859375, "step": 131 }, { "dpo_loss": 0.46484375, "epoch": 0.02, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 9.996931449488044e-07, "loss": 0.4097, "projector_lr": 2.9990794348464133e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 1.1796875, "rewards_train/rejected": -1.5546875, "sft_loss": 0.71875, "step": 132 }, { "dpo_loss": 0.46484375, "epoch": 0.02, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 9.99684187090907e-07, "loss": 0.4825, "projector_lr": 2.9990525612727213e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 0.609375, "rewards_train/rejected": -0.78515625, "sft_loss": 0.87109375, "step": 133 }, { "dpo_loss": 0.5859375, "epoch": 0.02, "final_loss": 0.5859375, "grad_norm": 0.0, "learning_rate": 9.996751003976392e-07, "loss": 0.5457, "projector_lr": 2.9990253011929175e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 0.396484375, "rewards_train/rejected": -0.9453125, "sft_loss": 0.77734375, "step": 134 }, { "dpo_loss": 0.47265625, "epoch": 0.02, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 9.99665884871344e-07, "loss": 0.5149, "projector_lr": 2.998997654614032e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25, "rewards_train/margins": 0.7265625, "rewards_train/rejected": -0.9765625, "sft_loss": 0.86328125, "step": 135 }, { "dpo_loss": 0.46484375, "epoch": 0.02, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 9.996565405143974e-07, "loss": 0.4242, "projector_lr": 2.9989696215431923e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": 0.6796875, "rewards_train/rejected": -0.8203125, "sft_loss": 0.6484375, "step": 136 }, { "dpo_loss": 0.54296875, "epoch": 0.02, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 9.996470673292088e-07, "loss": 0.5008, "projector_lr": 2.998941201987626e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 0.49609375, "rewards_train/rejected": -0.9765625, "sft_loss": 0.74609375, "step": 137 }, { "dpo_loss": 0.5, "epoch": 0.02, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 9.996374653182205e-07, "loss": 0.4505, "projector_lr": 2.9989123959546616e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.03466796875, "rewards_train/margins": 0.51171875, "rewards_train/rejected": -0.54296875, "sft_loss": 0.91796875, "step": 138 }, { "dpo_loss": 0.51171875, "epoch": 0.02, "final_loss": 0.51171875, "grad_norm": 0.0, "learning_rate": 9.996277344839084e-07, "loss": 0.4504, "projector_lr": 2.998883203451725e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 0.55078125, "rewards_train/rejected": -0.7265625, "sft_loss": 0.67578125, "step": 139 }, { "dpo_loss": 0.5625, "epoch": 0.02, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 9.99617874828781e-07, "loss": 0.5528, "projector_lr": 2.9988536244863436e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.00018310546875, "rewards_train/margins": 0.32421875, "rewards_train/rejected": -0.32421875, "sft_loss": 0.625, "step": 140 }, { "dpo_loss": 0.66015625, "epoch": 0.02, "final_loss": 0.66015625, "grad_norm": 0.0, "learning_rate": 9.996078863553813e-07, "loss": 0.5149, "projector_lr": 2.9988236590661443e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 0.0625, "rewards_train/rejected": -0.1689453125, "sft_loss": 0.79296875, "step": 141 }, { "dpo_loss": 0.412109375, "epoch": 0.02, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 9.99597769066284e-07, "loss": 0.5249, "projector_lr": 2.9987933071988524e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.34375, "rewards_train/margins": 1.296875, "rewards_train/rejected": -1.6484375, "sft_loss": 0.91796875, "step": 142 }, { "dpo_loss": 0.57421875, "epoch": 0.02, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.99587522964098e-07, "loss": 0.5905, "projector_lr": 2.9987625688922943e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2216796875, "rewards_train/margins": 0.29296875, "rewards_train/rejected": -0.515625, "sft_loss": 0.58984375, "step": 143 }, { "dpo_loss": 0.5, "epoch": 0.02, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 9.995771480514647e-07, "loss": 0.619, "projector_lr": 2.9987314441543945e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 0.5, "rewards_train/rejected": -0.68359375, "sft_loss": 0.7265625, "step": 144 }, { "dpo_loss": 0.380859375, "epoch": 0.02, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 9.995666443310597e-07, "loss": 0.504, "projector_lr": 2.998699932993179e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0159912109375, "rewards_train/margins": 0.90234375, "rewards_train/rejected": -0.921875, "sft_loss": 0.55078125, "step": 145 }, { "dpo_loss": 0.65234375, "epoch": 0.02, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 9.995560118055905e-07, "loss": 0.6686, "projector_lr": 2.998668035416772e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1533203125, "rewards_train/margins": 0.1708984375, "rewards_train/rejected": -0.32421875, "sft_loss": 0.8125, "step": 146 }, { "dpo_loss": 0.58203125, "epoch": 0.02, "final_loss": 0.58203125, "grad_norm": 0.0, "learning_rate": 9.995452504777992e-07, "loss": 0.618, "projector_lr": 2.9986357514333977e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.609375, "rewards_train/margins": 0.5390625, "rewards_train/rejected": -1.1484375, "sft_loss": 0.7734375, "step": 147 }, { "dpo_loss": 0.361328125, "epoch": 0.02, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.9953436035046e-07, "loss": 0.4936, "projector_lr": 2.99860308105138e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": 1.046875, "rewards_train/rejected": -1.1875, "sft_loss": 0.75390625, "step": 148 }, { "dpo_loss": 0.474609375, "epoch": 0.02, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 9.99523341426381e-07, "loss": 0.514, "projector_lr": 2.9985700242791427e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.28125, "rewards_train/margins": 1.6953125, "rewards_train/rejected": -2.984375, "sft_loss": 0.62109375, "step": 149 }, { "dpo_loss": 0.44921875, "epoch": 0.02, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 9.995121937084029e-07, "loss": 0.4724, "projector_lr": 2.9985365811252086e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 0.91796875, "rewards_train/rejected": -1.140625, "sft_loss": 0.73046875, "step": 150 }, { "dpo_loss": 0.478515625, "epoch": 0.02, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 9.995009171994e-07, "loss": 0.5075, "projector_lr": 2.9985027515982e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1904296875, "rewards_train/margins": 0.765625, "rewards_train/rejected": -0.953125, "sft_loss": 1.0703125, "step": 151 }, { "dpo_loss": 0.6171875, "epoch": 0.02, "final_loss": 0.6171875, "grad_norm": 0.0, "learning_rate": 9.994895119022801e-07, "loss": 0.4692, "projector_lr": 2.9984685357068406e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.0625, "rewards_train/margins": 0.318359375, "rewards_train/rejected": -0.255859375, "sft_loss": 0.90234375, "step": 152 }, { "dpo_loss": 0.796875, "epoch": 0.02, "final_loss": 0.796875, "grad_norm": 0.0, "learning_rate": 9.994779778199838e-07, "loss": 0.6374, "projector_lr": 2.9984339334599517e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 0.00048828125, "rewards_train/rejected": -0.275390625, "sft_loss": 0.76171875, "step": 153 }, { "dpo_loss": 0.369140625, "epoch": 0.02, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.994663149554846e-07, "loss": 0.5455, "projector_lr": 2.9983989448664545e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.09326171875, "rewards_train/margins": 1.8984375, "rewards_train/rejected": -1.9921875, "sft_loss": 0.6796875, "step": 154 }, { "dpo_loss": 0.50390625, "epoch": 0.02, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.994545233117903e-07, "loss": 0.5231, "projector_lr": 2.998363569935371e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.10009765625, "rewards_train/margins": 0.7734375, "rewards_train/rejected": -0.875, "sft_loss": 0.64453125, "step": 155 }, { "dpo_loss": 0.5625, "epoch": 0.02, "final_loss": NaN, "grad_norm": 0.0, "learning_rate": 9.994426028919406e-07, "loss": 0.4741, "projector_lr": 2.9983278086758216e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.06884765625, "rewards_train/margins": 0.396484375, "rewards_train/rejected": -0.46484375, "sft_loss": NaN, "step": 156 }, { "dpo_loss": 0.388671875, "epoch": 0.03, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 9.99430553699009e-07, "loss": 0.4229, "projector_lr": 2.9982916610970267e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1611328125, "rewards_train/margins": 0.93359375, "rewards_train/rejected": -1.09375, "sft_loss": 0.609375, "step": 157 }, { "dpo_loss": 0.451171875, "epoch": 0.03, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 9.99418375736102e-07, "loss": 0.7064, "projector_lr": 2.9982551272083067e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2412109375, "rewards_train/margins": 0.7734375, "rewards_train/rejected": -1.015625, "sft_loss": 0.97265625, "step": 158 }, { "dpo_loss": 0.484375, "epoch": 0.03, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 9.994060690063602e-07, "loss": 0.5013, "projector_lr": 2.9982182070190807e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.06884765625, "rewards_train/margins": 0.65234375, "rewards_train/rejected": -0.71875, "sft_loss": 0.71875, "step": 159 }, { "dpo_loss": 0.3046875, "epoch": 0.03, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.993936335129562e-07, "loss": 0.4001, "projector_lr": 2.9981809005388685e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 1.28125, "rewards_train/rejected": -2.546875, "sft_loss": 0.65625, "step": 160 }, { "dpo_loss": 0.53515625, "epoch": 0.03, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 9.993810692590963e-07, "loss": 0.4745, "projector_lr": 2.998143207777289e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.291015625, "rewards_train/margins": 0.46875, "rewards_train/rejected": -0.76171875, "sft_loss": 0.703125, "step": 161 }, { "dpo_loss": 0.42578125, "epoch": 0.03, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 9.993683762480201e-07, "loss": 0.4925, "projector_lr": 2.9981051287440605e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.17578125, "rewards_train/margins": 0.78125, "rewards_train/rejected": -0.95703125, "sft_loss": 0.7421875, "step": 162 }, { "dpo_loss": 0.54296875, "epoch": 0.03, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 9.99355554483e-07, "loss": 0.5377, "projector_lr": 2.9980666634490008e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.018798828125, "rewards_train/margins": 0.59375, "rewards_train/rejected": -0.61328125, "sft_loss": 0.74609375, "step": 163 }, { "dpo_loss": 0.41796875, "epoch": 0.03, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.993426039673425e-07, "loss": 0.4843, "projector_lr": 2.9980278119020275e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.037841796875, "rewards_train/margins": 0.8671875, "rewards_train/rejected": -0.828125, "sft_loss": 0.9453125, "step": 164 }, { "dpo_loss": 0.3828125, "epoch": 0.03, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 9.99329524704386e-07, "loss": 0.4534, "projector_lr": 2.9979885741131587e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.265625, "rewards_train/margins": 1.1171875, "rewards_train/rejected": -1.3828125, "sft_loss": 0.5625, "step": 165 }, { "dpo_loss": 0.61328125, "epoch": 0.03, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 9.993163166975035e-07, "loss": 0.5201, "projector_lr": 2.9979489500925104e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 0.35546875, "rewards_train/rejected": -0.625, "sft_loss": 0.6953125, "step": 166 }, { "dpo_loss": 0.5234375, "epoch": 0.03, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.993029799500995e-07, "loss": 0.5678, "projector_lr": 2.997908939850299e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.375, "rewards_train/margins": 0.85546875, "rewards_train/rejected": -1.234375, "sft_loss": 0.8046875, "step": 167 }, { "dpo_loss": 0.63671875, "epoch": 0.03, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 9.992895144656136e-07, "loss": 0.557, "projector_lr": 2.9978685433968405e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.291015625, "rewards_train/margins": 0.353515625, "rewards_train/rejected": -0.64453125, "sft_loss": 0.6875, "step": 168 }, { "dpo_loss": 0.5, "epoch": 0.03, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 9.99275920247517e-07, "loss": 0.4867, "projector_lr": 2.9978277607425512e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 0.609375, "rewards_train/rejected": -1.1484375, "sft_loss": 1.078125, "step": 169 }, { "dpo_loss": 0.609375, "epoch": 0.03, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 9.99262197299315e-07, "loss": 0.6028, "projector_lr": 2.9977865918979456e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 0.412109375, "rewards_train/rejected": -0.86328125, "sft_loss": 0.66015625, "step": 170 }, { "dpo_loss": 0.5703125, "epoch": 0.03, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 9.99248345624546e-07, "loss": 0.4722, "projector_lr": 2.9977450368736383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3046875, "rewards_train/margins": 0.62109375, "rewards_train/rejected": -0.92578125, "sft_loss": 0.9765625, "step": 171 }, { "dpo_loss": 0.431640625, "epoch": 0.03, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 9.992343652267814e-07, "loss": 0.4077, "projector_lr": 2.997703095680344e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 1.2109375, "rewards_train/rejected": -1.5390625, "sft_loss": 0.81640625, "step": 172 }, { "dpo_loss": 0.5859375, "epoch": 0.03, "final_loss": 0.5859375, "grad_norm": 0.0, "learning_rate": 9.992202561096254e-07, "loss": 0.6576, "projector_lr": 2.9976607683288763e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 0.46875, "rewards_train/rejected": -1.2109375, "sft_loss": 0.65625, "step": 173 }, { "dpo_loss": 0.3203125, "epoch": 0.03, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 9.99206018276716e-07, "loss": 0.312, "projector_lr": 2.9976180548301486e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.439453125, "rewards_train/margins": 1.140625, "rewards_train/rejected": -1.578125, "sft_loss": 0.6171875, "step": 174 }, { "dpo_loss": 0.59765625, "epoch": 0.03, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 9.991916517317247e-07, "loss": 0.6363, "projector_lr": 2.997574955195174e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.416015625, "rewards_train/margins": 0.23828125, "rewards_train/rejected": -0.65234375, "sft_loss": 0.484375, "step": 175 }, { "dpo_loss": 0.5859375, "epoch": 0.03, "final_loss": 0.5859375, "grad_norm": 0.0, "learning_rate": 9.99177156478355e-07, "loss": 0.5938, "projector_lr": 2.9975314694350647e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.267578125, "rewards_train/margins": 0.369140625, "rewards_train/rejected": -0.63671875, "sft_loss": 0.76953125, "step": 176 }, { "dpo_loss": 0.439453125, "epoch": 0.03, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.991625325203443e-07, "loss": 0.2941, "projector_lr": 2.997487597561033e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2373046875, "rewards_train/margins": 1.0859375, "rewards_train/rejected": -1.328125, "sft_loss": 0.68359375, "step": 177 }, { "dpo_loss": 0.259765625, "epoch": 0.03, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 9.991477798614637e-07, "loss": 0.3029, "projector_lr": 2.9974433395843914e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 1.4140625, "rewards_train/rejected": -1.9140625, "sft_loss": 0.66015625, "step": 178 }, { "dpo_loss": 0.296875, "epoch": 0.03, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 9.991328985055165e-07, "loss": 0.4116, "projector_lr": 2.99739869551655e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640625, "rewards_train/margins": 1.40625, "rewards_train/rejected": -3.046875, "sft_loss": 0.640625, "step": 179 }, { "dpo_loss": 0.57421875, "epoch": 0.03, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.991178884563396e-07, "loss": 0.5356, "projector_lr": 2.997353665369019e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.625, "rewards_train/margins": 0.5390625, "rewards_train/rejected": -1.1640625, "sft_loss": 0.66796875, "step": 180 }, { "dpo_loss": 0.478515625, "epoch": 0.03, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 9.991027497178033e-07, "loss": 0.5772, "projector_lr": 2.99730824915341e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 0.64453125, "rewards_train/rejected": -1.078125, "sft_loss": 1.4140625, "step": 181 }, { "dpo_loss": 0.279296875, "epoch": 0.03, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 9.99087482293811e-07, "loss": 0.2987, "projector_lr": 2.997262446881433e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.421875, "rewards_train/margins": 1.4453125, "rewards_train/rejected": -1.8671875, "sft_loss": 0.71484375, "step": 182 }, { "dpo_loss": 0.423828125, "epoch": 0.03, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 9.990720861882985e-07, "loss": 0.4592, "projector_lr": 2.9972162585648958e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.75, "rewards_train/margins": 1.40625, "rewards_train/rejected": -2.15625, "sft_loss": 0.66796875, "step": 183 }, { "dpo_loss": 0.65234375, "epoch": 0.03, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 9.990565614052362e-07, "loss": 0.6765, "projector_lr": 2.997169684215709e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 0.330078125, "rewards_train/rejected": -0.98046875, "sft_loss": 0.8671875, "step": 184 }, { "dpo_loss": 0.4609375, "epoch": 0.03, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 9.990409079486264e-07, "loss": 0.3884, "projector_lr": 2.9971227238458793e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 0.7578125, "rewards_train/rejected": -1.0859375, "sft_loss": 0.70703125, "step": 185 }, { "dpo_loss": 0.37890625, "epoch": 0.03, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 9.990251258225053e-07, "loss": 0.4787, "projector_lr": 2.997075377467516e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 1.015625, "rewards_train/rejected": -1.4921875, "sft_loss": 0.8515625, "step": 186 }, { "dpo_loss": 0.462890625, "epoch": 0.03, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 9.99009215030942e-07, "loss": 0.5587, "projector_lr": 2.9970276450928264e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 0.81640625, "rewards_train/rejected": -1.2734375, "sft_loss": 0.640625, "step": 187 }, { "dpo_loss": 0.46875, "epoch": 0.03, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 9.98993175578039e-07, "loss": 0.54, "projector_lr": 2.996979526734117e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 1.9921875, "rewards_train/rejected": -2.96875, "sft_loss": 0.640625, "step": 188 }, { "dpo_loss": 0.46875, "epoch": 0.03, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 9.989770074679315e-07, "loss": 0.5078, "projector_lr": 2.9969310224037947e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 0.734375, "rewards_train/rejected": -0.96484375, "sft_loss": 0.71875, "step": 189 }, { "dpo_loss": 0.490234375, "epoch": 0.03, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 9.989607107047887e-07, "loss": 0.5256, "projector_lr": 2.996882132114366e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.333984375, "rewards_train/margins": 0.546875, "rewards_train/rejected": -0.8828125, "sft_loss": 0.73046875, "step": 190 }, { "dpo_loss": 0.42578125, "epoch": 0.03, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 9.989442852928118e-07, "loss": 0.3351, "projector_lr": 2.9968328558784355e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 0.84765625, "rewards_train/rejected": -1.1015625, "sft_loss": 1.0078125, "step": 191 }, { "dpo_loss": 0.46484375, "epoch": 0.03, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 9.98927731236236e-07, "loss": 0.4482, "projector_lr": 2.9967831937087084e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 0.7109375, "rewards_train/rejected": -1.2734375, "sft_loss": 0.78515625, "step": 192 }, { "dpo_loss": 0.275390625, "epoch": 0.03, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.9891104853933e-07, "loss": 0.3952, "projector_lr": 2.99673314561799e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.00634765625, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.3125, "sft_loss": 0.8515625, "step": 193 }, { "dpo_loss": 0.251953125, "epoch": 0.03, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 9.988942372063944e-07, "loss": 0.3522, "projector_lr": 2.9966827116191837e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 1.921875, "rewards_train/rejected": -2.390625, "sft_loss": 0.62890625, "step": 194 }, { "dpo_loss": 0.6328125, "epoch": 0.03, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 9.988772972417646e-07, "loss": 0.4946, "projector_lr": 2.9966318917252936e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 0.287109375, "rewards_train/rejected": -1.09375, "sft_loss": 0.859375, "step": 195 }, { "dpo_loss": 0.5546875, "epoch": 0.03, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 9.988602286498073e-07, "loss": 0.5948, "projector_lr": 2.9965806859494222e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 0.56640625, "rewards_train/rejected": -1.1953125, "sft_loss": 0.73828125, "step": 196 }, { "dpo_loss": 0.4296875, "epoch": 0.03, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 9.98843031434924e-07, "loss": 0.4171, "projector_lr": 2.9965290943047725e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 0.78125, "rewards_train/rejected": -1.234375, "sft_loss": 0.79296875, "step": 197 }, { "dpo_loss": 0.443359375, "epoch": 0.03, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 9.988257056015489e-07, "loss": 0.3522, "projector_lr": 2.9964771168046466e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 1.3046875, "rewards_train/rejected": -2.296875, "sft_loss": 0.80859375, "step": 198 }, { "dpo_loss": 0.47265625, "epoch": 0.03, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 9.988082511541484e-07, "loss": 0.4609, "projector_lr": 2.9964247534624454e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.34765625, "rewards_train/margins": 1.0234375, "rewards_train/rejected": -1.3671875, "sft_loss": 0.7109375, "step": 199 }, { "dpo_loss": 0.5078125, "epoch": 0.03, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.987906680972236e-07, "loss": 0.4411, "projector_lr": 2.996372004291671e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.625, "rewards_train/margins": 0.640625, "rewards_train/rejected": -1.265625, "sft_loss": 0.7421875, "step": 200 }, { "dpo_loss": 0.2734375, "epoch": 0.03, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 9.987729564353076e-07, "loss": 0.3507, "projector_lr": 2.996318869305923e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 2.0, "rewards_train/rejected": -2.671875, "sft_loss": 0.7734375, "step": 201 }, { "dpo_loss": 0.33984375, "epoch": 0.03, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 9.987551161729672e-07, "loss": 0.3395, "projector_lr": 2.996265348518902e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 1.359375, "rewards_train/rejected": -1.9140625, "sft_loss": 0.83203125, "step": 202 }, { "dpo_loss": 0.4453125, "epoch": 0.03, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 9.987371473148024e-07, "loss": 0.3693, "projector_lr": 2.996211441944407e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78125, "rewards_train/margins": 1.3359375, "rewards_train/rejected": -2.125, "sft_loss": 0.478515625, "step": 203 }, { "dpo_loss": 0.50390625, "epoch": 0.03, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.987190498654456e-07, "loss": 0.4523, "projector_lr": 2.996157149596337e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.271484375, "rewards_train/margins": 0.75, "rewards_train/rejected": -1.0234375, "sft_loss": 0.69921875, "step": 204 }, { "dpo_loss": 0.26953125, "epoch": 0.03, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 9.987008238295635e-07, "loss": 0.2969, "projector_lr": 2.9961024714886907e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 1.3984375, "rewards_train/rejected": -1.890625, "sft_loss": 0.796875, "step": 205 }, { "dpo_loss": 0.43359375, "epoch": 0.03, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 9.986824692118551e-07, "loss": 0.3671, "projector_lr": 2.9960474076355656e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.28125, "rewards_train/margins": 0.7265625, "rewards_train/rejected": -1.0078125, "sft_loss": 0.93359375, "step": 206 }, { "dpo_loss": 0.6875, "epoch": 0.03, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 9.98663986017053e-07, "loss": 0.6083, "projector_lr": 2.995991958051159e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.28125, "rewards_train/margins": 0.2421875, "rewards_train/rejected": -1.5234375, "sft_loss": 0.59375, "step": 207 }, { "dpo_loss": 1.5859375, "epoch": 0.03, "final_loss": 1.5859375, "grad_norm": 0.0, "learning_rate": 9.986453742499225e-07, "loss": 1.1454, "projector_lr": 2.995936122749768e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.59375, "rewards_train/margins": -0.40234375, "rewards_train/rejected": -2.1875, "sft_loss": 0.73046875, "step": 208 }, { "dpo_loss": 0.3125, "epoch": 0.03, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 9.986266339152627e-07, "loss": 0.3663, "projector_lr": 2.995879901745788e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.314453125, "rewards_train/margins": 1.109375, "rewards_train/rejected": -1.4296875, "sft_loss": 0.640625, "step": 209 }, { "dpo_loss": 0.490234375, "epoch": 0.03, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 9.986077650179052e-07, "loss": 0.4171, "projector_lr": 2.995823295053716e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.31640625, "rewards_train/margins": 1.078125, "rewards_train/rejected": -1.3984375, "sft_loss": 0.68359375, "step": 210 }, { "dpo_loss": 0.5390625, "epoch": 0.03, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 9.985887675627154e-07, "loss": 0.5235, "projector_lr": 2.9957663026881462e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 1.2421875, "rewards_train/rejected": -1.640625, "sft_loss": 0.7421875, "step": 211 }, { "dpo_loss": 0.5625, "epoch": 0.03, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 9.985696415545912e-07, "loss": 0.4344, "projector_lr": 2.9957089246637737e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 0.7734375, "rewards_train/rejected": -1.3828125, "sft_loss": 0.7421875, "step": 212 }, { "dpo_loss": 0.453125, "epoch": 0.03, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 9.985503869984637e-07, "loss": 0.4969, "projector_lr": 2.9956511609953914e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0029296875, "rewards_train/margins": 1.015625, "rewards_train/rejected": -1.015625, "sft_loss": 0.70703125, "step": 213 }, { "dpo_loss": 0.287109375, "epoch": 0.03, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 9.98531003899298e-07, "loss": 0.3408, "projector_lr": 2.995593011697894e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 1.6484375, "rewards_train/rejected": -1.9296875, "sft_loss": 0.63671875, "step": 214 }, { "dpo_loss": 0.435546875, "epoch": 0.03, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 9.985114922620913e-07, "loss": 0.4701, "projector_lr": 2.9955344767862737e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.083984375, "rewards_train/margins": 1.3203125, "rewards_train/rejected": -1.2421875, "sft_loss": 0.73828125, "step": 215 }, { "dpo_loss": 0.373046875, "epoch": 0.03, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 9.984918520918742e-07, "loss": 0.4525, "projector_lr": 2.9954755562756226e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1318359375, "rewards_train/margins": 1.0390625, "rewards_train/rejected": -1.171875, "sft_loss": 0.54296875, "step": 216 }, { "dpo_loss": 0.59765625, "epoch": 0.03, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 9.984720833937108e-07, "loss": 0.435, "projector_lr": 2.9954162501811325e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 0.53125, "rewards_train/rejected": -1.1953125, "sft_loss": 0.7265625, "step": 217 }, { "dpo_loss": 0.625, "epoch": 0.03, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 9.984521861726984e-07, "loss": 0.6503, "projector_lr": 2.9953565585180952e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 0.3125, "rewards_train/rejected": -0.70703125, "sft_loss": 0.7109375, "step": 218 }, { "dpo_loss": 0.50390625, "epoch": 0.04, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.984321604339668e-07, "loss": 0.4571, "projector_lr": 2.9952964813019005e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.216796875, "rewards_train/margins": 0.73046875, "rewards_train/rejected": -0.94921875, "sft_loss": 0.62890625, "step": 219 }, { "dpo_loss": 0.32421875, "epoch": 0.04, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 9.984120061826794e-07, "loss": 0.2417, "projector_lr": 2.995236018548038e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.033935546875, "rewards_train/margins": 1.4140625, "rewards_train/rejected": -1.3828125, "sft_loss": 0.6640625, "step": 220 }, { "dpo_loss": 0.37890625, "epoch": 0.04, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 9.983917234240327e-07, "loss": 0.3447, "projector_lr": 2.9951751702720984e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 1.4921875, "rewards_train/rejected": -1.6015625, "sft_loss": 0.4765625, "step": 221 }, { "dpo_loss": 0.478515625, "epoch": 0.04, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 9.983713121632562e-07, "loss": 0.4401, "projector_lr": 2.995113936489769e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.142578125, "rewards_train/margins": 1.171875, "rewards_train/rejected": -1.03125, "sft_loss": 0.79296875, "step": 222 }, { "dpo_loss": 0.294921875, "epoch": 0.04, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.983507724056125e-07, "loss": 0.304, "projector_lr": 2.995052317216838e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.05615234375, "rewards_train/margins": 1.5390625, "rewards_train/rejected": -1.59375, "sft_loss": 0.73046875, "step": 223 }, { "dpo_loss": 0.267578125, "epoch": 0.04, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 9.98330104156398e-07, "loss": 0.2483, "projector_lr": 2.994990312469194e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05615234375, "rewards_train/margins": 1.671875, "rewards_train/rejected": -1.734375, "sft_loss": 0.48828125, "step": 224 }, { "dpo_loss": 0.388671875, "epoch": 0.04, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 9.98309307420941e-07, "loss": 0.472, "projector_lr": 2.994927922262823e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.259765625, "rewards_train/margins": 1.5546875, "rewards_train/rejected": -1.8125, "sft_loss": 0.62890625, "step": 225 }, { "dpo_loss": 0.17578125, "epoch": 0.04, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 9.98288382204604e-07, "loss": 0.3058, "projector_lr": 2.994865146613812e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23046875, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.40625, "sft_loss": 0.49609375, "step": 226 }, { "dpo_loss": 0.2578125, "epoch": 0.04, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.98267328512782e-07, "loss": 0.551, "projector_lr": 2.994801985538346e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24609375, "rewards_train/margins": 1.546875, "rewards_train/rejected": -1.3046875, "sft_loss": 0.875, "step": 227 }, { "dpo_loss": 0.74609375, "epoch": 0.04, "final_loss": 0.74609375, "grad_norm": 0.0, "learning_rate": 9.982461463509032e-07, "loss": 0.5953, "projector_lr": 2.99473843905271e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.490234375, "rewards_train/margins": 0.458984375, "rewards_train/rejected": -0.953125, "sft_loss": 0.48046875, "step": 228 }, { "dpo_loss": 0.38671875, "epoch": 0.04, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 9.982248357244296e-07, "loss": 0.4835, "projector_lr": 2.9946745071732892e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2412109375, "rewards_train/margins": 1.421875, "rewards_train/rejected": -1.6640625, "sft_loss": 0.80078125, "step": 229 }, { "dpo_loss": 0.2734375, "epoch": 0.04, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 9.982033966388554e-07, "loss": 0.3546, "projector_lr": 2.9946101899165664e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 1.5078125, "rewards_train/rejected": -2.078125, "sft_loss": 0.640625, "step": 230 }, { "dpo_loss": 0.5, "epoch": 0.04, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 9.981818290997086e-07, "loss": 0.4392, "projector_lr": 2.994545487299126e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 1.3984375, "rewards_train/rejected": -2.171875, "sft_loss": 0.71484375, "step": 231 }, { "dpo_loss": 0.2294921875, "epoch": 0.04, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 9.981601331125497e-07, "loss": 0.3296, "projector_lr": 2.994480399337649e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0771484375, "rewards_train/margins": 2.140625, "rewards_train/rejected": -2.21875, "sft_loss": 0.5, "step": 232 }, { "dpo_loss": 0.439453125, "epoch": 0.04, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.981383086829727e-07, "loss": 0.4149, "projector_lr": 2.9944149260489185e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.515625, "rewards_train/margins": 1.265625, "rewards_train/rejected": -1.78125, "sft_loss": 0.83984375, "step": 233 }, { "dpo_loss": 0.34765625, "epoch": 0.04, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 9.98116355816605e-07, "loss": 0.3684, "projector_lr": 2.9943490674498152e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.423828125, "rewards_train/margins": 1.5703125, "rewards_train/rejected": -2.0, "sft_loss": 0.8203125, "step": 234 }, { "dpo_loss": 0.345703125, "epoch": 0.04, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 9.980942745191065e-07, "loss": 0.3873, "projector_lr": 2.9942828235573196e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.46875, "rewards_train/margins": 1.5234375, "rewards_train/rejected": -1.9921875, "sft_loss": 0.47265625, "step": 235 }, { "dpo_loss": 0.275390625, "epoch": 0.04, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.980720647961704e-07, "loss": 0.3636, "projector_lr": 2.9942161943885117e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3125, "rewards_train/margins": 1.890625, "rewards_train/rejected": -2.203125, "sft_loss": 1.0546875, "step": 236 }, { "dpo_loss": 0.3046875, "epoch": 0.04, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.980497266535236e-07, "loss": 0.3567, "projector_lr": 2.9941491799605706e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4375, "rewards_train/margins": 1.9921875, "rewards_train/rejected": -2.4375, "sft_loss": 0.63671875, "step": 237 }, { "dpo_loss": 0.146484375, "epoch": 0.04, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 9.980272600969251e-07, "loss": 0.2757, "projector_lr": 2.9940817802907752e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1162109375, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.609375, "sft_loss": 0.6875, "step": 238 }, { "dpo_loss": 0.53515625, "epoch": 0.04, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 9.980046651321677e-07, "loss": 0.3876, "projector_lr": 2.994013995396503e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.25, "rewards_train/margins": 0.97265625, "rewards_train/rejected": -1.2265625, "sft_loss": 0.57421875, "step": 239 }, { "dpo_loss": 0.2890625, "epoch": 0.04, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.979819417650773e-07, "loss": 0.325, "projector_lr": 2.993945825295232e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11279296875, "rewards_train/margins": 1.46875, "rewards_train/rejected": -1.3515625, "sft_loss": 0.6015625, "step": 240 }, { "dpo_loss": 0.83203125, "epoch": 0.04, "final_loss": 0.83203125, "grad_norm": 0.0, "learning_rate": 9.979590900015124e-07, "loss": 0.6521, "projector_lr": 2.9938772700045377e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 0.76953125, "rewards_train/rejected": -1.359375, "sft_loss": 0.8515625, "step": 241 }, { "dpo_loss": 0.59375, "epoch": 0.04, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.979361098473652e-07, "loss": 0.44, "projector_lr": 2.993808329542096e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.05029296875, "rewards_train/margins": 0.72265625, "rewards_train/rejected": -0.67578125, "sft_loss": 0.765625, "step": 242 }, { "dpo_loss": 0.416015625, "epoch": 0.04, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 9.97913001308561e-07, "loss": 0.3003, "projector_lr": 2.9937390039256833e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0556640625, "rewards_train/margins": 0.90625, "rewards_train/rejected": -0.9609375, "sft_loss": 0.63671875, "step": 243 }, { "dpo_loss": 0.255859375, "epoch": 0.04, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.978897643910575e-07, "loss": 0.3338, "projector_lr": 2.993669293173173e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 1.8984375, "rewards_train/rejected": -1.9765625, "sft_loss": 0.6640625, "step": 244 }, { "dpo_loss": 0.38671875, "epoch": 0.04, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 9.978663991008462e-07, "loss": 0.3012, "projector_lr": 2.993599197302539e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.03369140625, "rewards_train/margins": 1.8984375, "rewards_train/rejected": -1.8671875, "sft_loss": 0.85546875, "step": 245 }, { "dpo_loss": 0.228515625, "epoch": 0.04, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 9.978429054439515e-07, "loss": 0.282, "projector_lr": 2.9935287163318546e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.043701171875, "rewards_train/margins": 2.578125, "rewards_train/rejected": -2.53125, "sft_loss": 0.7109375, "step": 246 }, { "dpo_loss": 0.8671875, "epoch": 0.04, "final_loss": 0.8671875, "grad_norm": 0.0, "learning_rate": 9.978192834264304e-07, "loss": 0.5807, "projector_lr": 2.9934578502792918e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.296875, "rewards_train/margins": 0.84765625, "rewards_train/rejected": -1.1484375, "sft_loss": 0.5390625, "step": 247 }, { "dpo_loss": 0.59375, "epoch": 0.04, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.977955330543742e-07, "loss": 0.505, "projector_lr": 2.993386599163123e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.01904296875, "rewards_train/margins": 0.275390625, "rewards_train/rejected": -0.255859375, "sft_loss": 0.87109375, "step": 248 }, { "dpo_loss": 0.466796875, "epoch": 0.04, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 9.97771654333906e-07, "loss": 0.4599, "projector_lr": 2.9933149630017186e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.14453125, "rewards_train/margins": 1.0078125, "rewards_train/rejected": -1.1484375, "sft_loss": 0.69921875, "step": 249 }, { "dpo_loss": 0.2373046875, "epoch": 0.04, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 9.977476472711828e-07, "loss": 0.3615, "projector_lr": 2.9932429418135487e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 1.890625, "rewards_train/rejected": -2.28125, "sft_loss": 0.79296875, "step": 250 }, { "dpo_loss": 0.453125, "epoch": 0.04, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 9.977235118723942e-07, "loss": 0.3836, "projector_lr": 2.993170535617183e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.212890625, "rewards_train/margins": 1.40625, "rewards_train/rejected": -1.1875, "sft_loss": 0.51171875, "step": 251 }, { "dpo_loss": 0.30078125, "epoch": 0.04, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.976992481437633e-07, "loss": 0.5192, "projector_lr": 2.99309774443129e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.06591796875, "rewards_train/margins": 1.78125, "rewards_train/rejected": -1.8515625, "sft_loss": 0.546875, "step": 252 }, { "dpo_loss": 0.3359375, "epoch": 0.04, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.97674856091546e-07, "loss": 0.2653, "projector_lr": 2.9930245682746383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.062255859375, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -1.9921875, "sft_loss": 0.515625, "step": 253 }, { "dpo_loss": 0.28125, "epoch": 0.04, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.976503357220314e-07, "loss": 0.3241, "projector_lr": 2.9929510071660945e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.197265625, "rewards_train/margins": 1.8046875, "rewards_train/rejected": -1.609375, "sft_loss": 0.65625, "step": 254 }, { "dpo_loss": 0.375, "epoch": 0.04, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 9.976256870415418e-07, "loss": 0.5423, "projector_lr": 2.9928770611246255e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1337890625, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -1.921875, "sft_loss": 0.78515625, "step": 255 }, { "dpo_loss": 0.205078125, "epoch": 0.04, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 9.976009100564322e-07, "loss": 0.3665, "projector_lr": 2.992802730169297e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33984375, "rewards_train/margins": 1.8671875, "rewards_train/rejected": -1.5234375, "sft_loss": 0.62109375, "step": 256 }, { "dpo_loss": 0.427734375, "epoch": 0.04, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 9.975760047730912e-07, "loss": 0.2975, "projector_lr": 2.9927280143192738e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.125, "rewards_train/margins": 1.234375, "rewards_train/rejected": -1.359375, "sft_loss": 0.5078125, "step": 257 }, { "dpo_loss": 0.60546875, "epoch": 0.04, "final_loss": 0.60546875, "grad_norm": 0.0, "learning_rate": 9.9755097119794e-07, "loss": 0.4855, "projector_lr": 2.9926529135938204e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.333984375, "rewards_train/margins": 0.87890625, "rewards_train/rejected": -1.2109375, "sft_loss": 0.59375, "step": 258 }, { "dpo_loss": 0.248046875, "epoch": 0.04, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 9.975258093374335e-07, "loss": 0.2634, "projector_lr": 2.9925774280123004e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2373046875, "rewards_train/margins": 1.6484375, "rewards_train/rejected": -1.4140625, "sft_loss": 0.75390625, "step": 259 }, { "dpo_loss": 0.41796875, "epoch": 0.04, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.975005191980588e-07, "loss": 0.41, "projector_lr": 2.9925015575941764e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.390625, "rewards_train/margins": 1.25, "rewards_train/rejected": -1.640625, "sft_loss": 0.515625, "step": 260 }, { "dpo_loss": 0.6328125, "epoch": 0.04, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 9.974751007863368e-07, "loss": 0.4712, "projector_lr": 2.9924253023590103e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.060302734375, "rewards_train/margins": 1.1875, "rewards_train/rejected": -1.1328125, "sft_loss": 0.80078125, "step": 261 }, { "dpo_loss": 0.4921875, "epoch": 0.04, "final_loss": 0.4921875, "grad_norm": 0.0, "learning_rate": 9.97449554108821e-07, "loss": 0.3781, "projector_lr": 2.992348662326463e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.203125, "rewards_train/margins": 1.03125, "rewards_train/rejected": -0.828125, "sft_loss": 0.51953125, "step": 262 }, { "dpo_loss": 0.427734375, "epoch": 0.04, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 9.974238791720983e-07, "loss": 0.4518, "projector_lr": 2.9922716375162954e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.119140625, "rewards_train/margins": 1.6484375, "rewards_train/rejected": -1.7734375, "sft_loss": 0.8125, "step": 263 }, { "dpo_loss": 0.259765625, "epoch": 0.04, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 9.97398075982789e-07, "loss": 0.254, "projector_lr": 2.992194227948367e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.55078125, "rewards_train/margins": 2.03125, "rewards_train/rejected": -1.484375, "sft_loss": 0.80078125, "step": 264 }, { "dpo_loss": 0.234375, "epoch": 0.04, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 9.973721445475451e-07, "loss": 0.3417, "projector_lr": 2.9921164336426358e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6875, "rewards_train/margins": 1.6640625, "rewards_train/rejected": -0.9765625, "sft_loss": 0.8203125, "step": 265 }, { "dpo_loss": 0.16796875, "epoch": 0.04, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.973460848730534e-07, "loss": 0.4017, "projector_lr": 2.9920382546191607e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3671875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.25, "sft_loss": 0.56640625, "step": 266 }, { "dpo_loss": 0.4375, "epoch": 0.04, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 9.973198969660327e-07, "loss": 0.2658, "projector_lr": 2.9919596908980984e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.0006103515625, "rewards_train/margins": 1.0234375, "rewards_train/rejected": -1.0234375, "sft_loss": 0.98828125, "step": 267 }, { "dpo_loss": 0.494140625, "epoch": 0.04, "final_loss": 0.494140625, "grad_norm": 0.0, "learning_rate": 9.972935808332352e-07, "loss": 0.4472, "projector_lr": 2.9918807424997057e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.166015625, "rewards_train/margins": 1.46875, "rewards_train/rejected": -1.3046875, "sft_loss": 0.6328125, "step": 268 }, { "dpo_loss": 0.283203125, "epoch": 0.04, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 9.97267136481446e-07, "loss": 0.2182, "projector_lr": 2.991801409444338e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2041015625, "rewards_train/margins": 1.953125, "rewards_train/rejected": -1.75, "sft_loss": 0.6015625, "step": 269 }, { "dpo_loss": 0.232421875, "epoch": 0.04, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.972405639174833e-07, "loss": 0.3153, "projector_lr": 2.99172169175245e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1376953125, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.203125, "sft_loss": 0.703125, "step": 270 }, { "dpo_loss": 0.62890625, "epoch": 0.04, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.972138631481983e-07, "loss": 0.601, "projector_lr": 2.991641589444595e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 0.55859375, "rewards_train/rejected": -0.828125, "sft_loss": 0.578125, "step": 271 }, { "dpo_loss": 0.240234375, "epoch": 0.04, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 9.971870341804758e-07, "loss": 0.2162, "projector_lr": 2.9915611025414277e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2060546875, "rewards_train/margins": 1.703125, "rewards_train/rejected": -1.5, "sft_loss": 0.62890625, "step": 272 }, { "dpo_loss": 0.2099609375, "epoch": 0.04, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 9.971600770212327e-07, "loss": 0.2364, "projector_lr": 2.991480231063698e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3984375, "rewards_train/margins": 1.921875, "rewards_train/rejected": -2.3125, "sft_loss": 0.85546875, "step": 273 }, { "dpo_loss": 0.2216796875, "epoch": 0.04, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 9.971329916774196e-07, "loss": 0.3135, "projector_lr": 2.991398975032259e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.140625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -2.59375, "sft_loss": 0.6484375, "step": 274 }, { "dpo_loss": 0.2314453125, "epoch": 0.04, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 9.971057781560202e-07, "loss": 0.1705, "projector_lr": 2.9913173344680607e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1904296875, "rewards_train/margins": 1.859375, "rewards_train/rejected": -2.046875, "sft_loss": 0.61328125, "step": 275 }, { "dpo_loss": 0.326171875, "epoch": 0.04, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 9.970784364640509e-07, "loss": 0.329, "projector_lr": 2.991235309392153e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.28125, "rewards_train/margins": 1.203125, "rewards_train/rejected": -0.91796875, "sft_loss": 0.77734375, "step": 276 }, { "dpo_loss": 0.388671875, "epoch": 0.04, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 9.970509666085614e-07, "loss": 0.576, "projector_lr": 2.991152899825684e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.134765625, "rewards_train/margins": 1.2265625, "rewards_train/rejected": -1.09375, "sft_loss": 0.91015625, "step": 277 }, { "dpo_loss": 0.61328125, "epoch": 0.04, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 9.970233685966341e-07, "loss": 0.4666, "projector_lr": 2.991070105789902e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 1.109375, "rewards_train/rejected": -1.6171875, "sft_loss": 0.57421875, "step": 278 }, { "dpo_loss": 0.326171875, "epoch": 0.04, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 9.969956424353851e-07, "loss": 0.459, "projector_lr": 2.9909869273061553e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.041015625, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.203125, "sft_loss": 0.5859375, "step": 279 }, { "dpo_loss": 0.390625, "epoch": 0.04, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.969677881319628e-07, "loss": 0.3287, "projector_lr": 2.9909033643958887e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.05615234375, "rewards_train/margins": 0.8984375, "rewards_train/rejected": -0.95703125, "sft_loss": 0.83203125, "step": 280 }, { "dpo_loss": 0.388671875, "epoch": 0.04, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 9.969398056935491e-07, "loss": 0.3171, "projector_lr": 2.990819417080648e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.34375, "rewards_train/margins": 1.0078125, "rewards_train/rejected": -1.3515625, "sft_loss": 0.7265625, "step": 281 }, { "dpo_loss": 0.51953125, "epoch": 0.05, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 9.96911695127359e-07, "loss": 0.4365, "projector_lr": 2.9907350853820768e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 1.515625, "rewards_train/rejected": -2.03125, "sft_loss": 0.5234375, "step": 282 }, { "dpo_loss": 0.302734375, "epoch": 0.05, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.9688345644064e-07, "loss": 0.3622, "projector_lr": 2.99065036932192e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1806640625, "rewards_train/margins": 2.046875, "rewards_train/rejected": -2.21875, "sft_loss": 0.90234375, "step": 283 }, { "dpo_loss": 0.42578125, "epoch": 0.05, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 9.96855089640673e-07, "loss": 0.3493, "projector_lr": 2.9905652689220195e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 1.1875, "rewards_train/rejected": -1.296875, "sft_loss": 0.85546875, "step": 284 }, { "dpo_loss": 0.232421875, "epoch": 0.05, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.968265947347723e-07, "loss": 0.3557, "projector_lr": 2.990479784204317e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0908203125, "rewards_train/margins": 2.109375, "rewards_train/rejected": -2.015625, "sft_loss": 0.796875, "step": 285 }, { "dpo_loss": 0.2255859375, "epoch": 0.05, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 9.967979717302846e-07, "loss": 0.2897, "projector_lr": 2.9903939151908537e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1787109375, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.15625, "sft_loss": 0.67578125, "step": 286 }, { "dpo_loss": 0.3203125, "epoch": 0.05, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 9.967692206345898e-07, "loss": 0.2955, "projector_lr": 2.9903076619037695e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.166015625, "rewards_train/margins": 1.6875, "rewards_train/rejected": -1.515625, "sft_loss": 0.494140625, "step": 287 }, { "dpo_loss": 0.349609375, "epoch": 0.05, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 9.96740341455101e-07, "loss": 0.2931, "projector_lr": 2.990221024365303e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.203125, "rewards_train/margins": 1.65625, "rewards_train/rejected": -1.453125, "sft_loss": 0.6484375, "step": 288 }, { "dpo_loss": 0.3515625, "epoch": 0.05, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 9.96711334199264e-07, "loss": 0.3242, "projector_lr": 2.9901340025977925e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.2119140625, "rewards_train/margins": 1.5078125, "rewards_train/rejected": -1.296875, "sft_loss": 0.82421875, "step": 289 }, { "dpo_loss": 0.5078125, "epoch": 0.05, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.966821988745585e-07, "loss": 0.3626, "projector_lr": 2.9900465966236752e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.09619140625, "rewards_train/margins": 1.359375, "rewards_train/rejected": -1.453125, "sft_loss": 0.640625, "step": 290 }, { "dpo_loss": 0.55859375, "epoch": 0.05, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 9.966529354884955e-07, "loss": 0.646, "projector_lr": 2.989958806465487e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.0244140625, "rewards_train/margins": 1.171875, "rewards_train/rejected": -1.1484375, "sft_loss": 0.6875, "step": 291 }, { "dpo_loss": 0.3671875, "epoch": 0.05, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 9.966235440486212e-07, "loss": 0.3494, "projector_lr": 2.989870632145864e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.30078125, "rewards_train/margins": 1.46875, "rewards_train/rejected": -1.171875, "sft_loss": 0.61328125, "step": 292 }, { "dpo_loss": 0.26953125, "epoch": 0.05, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 9.965940245625131e-07, "loss": 0.3082, "projector_lr": 2.9897820736875394e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15625, "rewards_train/margins": 1.6484375, "rewards_train/rejected": -1.4921875, "sft_loss": 0.71484375, "step": 293 }, { "dpo_loss": 0.2734375, "epoch": 0.05, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 9.96564377037782e-07, "loss": 0.281, "projector_lr": 2.9896931311133467e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.279296875, "rewards_train/margins": 2.359375, "rewards_train/rejected": -2.078125, "sft_loss": 0.6796875, "step": 294 }, { "dpo_loss": 0.359375, "epoch": 0.05, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 9.96534601482073e-07, "loss": 0.3073, "projector_lr": 2.9896038044462195e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.10009765625, "rewards_train/margins": 1.6328125, "rewards_train/rejected": -1.53125, "sft_loss": 0.69921875, "step": 295 }, { "dpo_loss": 0.2275390625, "epoch": 0.05, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 9.965046979030624e-07, "loss": 0.3106, "projector_lr": 2.9895140937091873e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.412109375, "rewards_train/margins": 1.7265625, "rewards_train/rejected": -1.3125, "sft_loss": 0.515625, "step": 296 }, { "dpo_loss": 0.39453125, "epoch": 0.05, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 9.964746663084607e-07, "loss": 0.2983, "projector_lr": 2.9894239989253824e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.041015625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.140625, "sft_loss": 0.578125, "step": 297 }, { "dpo_loss": 0.4296875, "epoch": 0.05, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 9.964445067060108e-07, "loss": 0.5654, "projector_lr": 2.9893335201180327e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.03173828125, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.0625, "sft_loss": 0.7265625, "step": 298 }, { "dpo_loss": 0.47265625, "epoch": 0.05, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 9.964142191034891e-07, "loss": 0.6334, "projector_lr": 2.989242657310468e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.3046875, "rewards_train/margins": 1.6171875, "rewards_train/rejected": -1.3046875, "sft_loss": 0.76953125, "step": 299 }, { "dpo_loss": 0.4140625, "epoch": 0.05, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 9.963838035087048e-07, "loss": 0.2769, "projector_lr": 2.989151410526115e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 1.5703125, "rewards_train/rejected": -1.8515625, "sft_loss": 0.84375, "step": 300 }, { "dpo_loss": 0.37890625, "epoch": 0.05, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 9.963532599295e-07, "loss": 0.2362, "projector_lr": 2.9890597797885e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.00030517578125, "rewards_train/margins": 1.8984375, "rewards_train/rejected": -1.8984375, "sft_loss": 0.7109375, "step": 301 }, { "dpo_loss": 0.1572265625, "epoch": 0.05, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 9.963225883737496e-07, "loss": 0.1773, "projector_lr": 2.988967765121249e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06884765625, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.0, "sft_loss": 0.61328125, "step": 302 }, { "dpo_loss": 0.232421875, "epoch": 0.05, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.96291788849362e-07, "loss": 0.5635, "projector_lr": 2.9888753665480863e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.57421875, "rewards_train/margins": 2.625, "rewards_train/rejected": -2.046875, "sft_loss": 0.8671875, "step": 303 }, { "dpo_loss": 0.35546875, "epoch": 0.05, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 9.962608613642785e-07, "loss": 0.2555, "projector_lr": 2.9887825840928357e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.044189453125, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.296875, "sft_loss": 0.71484375, "step": 304 }, { "dpo_loss": 0.173828125, "epoch": 0.05, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 9.962298059264729e-07, "loss": 0.3164, "projector_lr": 2.9886894177794193e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.037841796875, "rewards_train/margins": 2.71875, "rewards_train/rejected": -2.6875, "sft_loss": 0.490234375, "step": 305 }, { "dpo_loss": 0.55078125, "epoch": 0.05, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 9.961986225439527e-07, "loss": 0.3635, "projector_lr": 2.988595867631858e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 0.8984375, "rewards_train/rejected": -1.6875, "sft_loss": 0.6796875, "step": 306 }, { "dpo_loss": 0.515625, "epoch": 0.05, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 9.961673112247576e-07, "loss": 0.3638, "projector_lr": 2.9885019336742734e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.330078125, "rewards_train/margins": 1.03125, "rewards_train/rejected": -1.359375, "sft_loss": 0.83984375, "step": 307 }, { "dpo_loss": 0.5234375, "epoch": 0.05, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.961358719769613e-07, "loss": 0.4213, "projector_lr": 2.988407615930884e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.06494140625, "rewards_train/margins": 0.5078125, "rewards_train/rejected": -0.5703125, "sft_loss": 0.74609375, "step": 308 }, { "dpo_loss": 0.3046875, "epoch": 0.05, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.961043048086692e-07, "loss": 0.3876, "projector_lr": 2.9883129144260075e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.03759765625, "rewards_train/margins": 2.375, "rewards_train/rejected": -2.328125, "sft_loss": 0.76171875, "step": 309 }, { "dpo_loss": 0.166015625, "epoch": 0.05, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 9.960726097280207e-07, "loss": 0.2659, "projector_lr": 2.9882178291840626e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 2.125, "rewards_train/rejected": -2.3125, "sft_loss": 0.64453125, "step": 310 }, { "dpo_loss": 0.55078125, "epoch": 0.05, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 9.96040786743188e-07, "loss": 0.451, "projector_lr": 2.988122360229564e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 1.5546875, "rewards_train/rejected": -1.84375, "sft_loss": 0.625, "step": 311 }, { "dpo_loss": 0.1474609375, "epoch": 0.05, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 9.96008835862376e-07, "loss": 0.2392, "projector_lr": 2.9880265075871282e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.462890625, "rewards_train/margins": 2.84375, "rewards_train/rejected": -2.375, "sft_loss": 0.65234375, "step": 312 }, { "dpo_loss": 0.341796875, "epoch": 0.05, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 9.959767570938226e-07, "loss": 0.4529, "projector_lr": 2.9879302712814682e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3203125, "rewards_train/margins": 1.25, "rewards_train/rejected": -1.5703125, "sft_loss": 0.6796875, "step": 313 }, { "dpo_loss": 0.376953125, "epoch": 0.05, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 9.95944550445799e-07, "loss": 0.4465, "projector_lr": 2.9878336513373976e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.34375, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.59375, "sft_loss": 0.703125, "step": 314 }, { "dpo_loss": 0.462890625, "epoch": 0.05, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 9.959122159266092e-07, "loss": 0.3325, "projector_lr": 2.9877366477798275e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2138671875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.828125, "sft_loss": 0.765625, "step": 315 }, { "dpo_loss": 0.279296875, "epoch": 0.05, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 9.958797535445897e-07, "loss": 0.4679, "projector_lr": 2.9876392606337696e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1494140625, "rewards_train/margins": 1.7578125, "rewards_train/rejected": -1.609375, "sft_loss": 0.6171875, "step": 316 }, { "dpo_loss": 0.10888671875, "epoch": 0.05, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 9.95847163308111e-07, "loss": 0.2911, "projector_lr": 2.987541489924333e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1875, "rewards_train/margins": 2.796875, "rewards_train/rejected": -2.625, "sft_loss": 0.5859375, "step": 317 }, { "dpo_loss": 0.255859375, "epoch": 0.05, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.958144452255753e-07, "loss": 0.2043, "projector_lr": 2.9874433356767257e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.392578125, "rewards_train/margins": 1.8359375, "rewards_train/rejected": -1.4453125, "sft_loss": 0.62109375, "step": 318 }, { "dpo_loss": 0.39453125, "epoch": 0.05, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 9.957815993054188e-07, "loss": 0.4237, "projector_lr": 2.9873447979162567e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1474609375, "rewards_train/margins": 1.2734375, "rewards_train/rejected": -1.421875, "sft_loss": 0.81640625, "step": 319 }, { "dpo_loss": 0.275390625, "epoch": 0.05, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.957486255561103e-07, "loss": 0.3203, "projector_lr": 2.987245876668331e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.3125, "sft_loss": 0.796875, "step": 320 }, { "dpo_loss": 0.330078125, "epoch": 0.05, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 9.957155239861515e-07, "loss": 0.2903, "projector_lr": 2.9871465719584544e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 1.796875, "rewards_train/rejected": -2.453125, "sft_loss": 0.70703125, "step": 321 }, { "dpo_loss": 0.51171875, "epoch": 0.05, "final_loss": 0.51171875, "grad_norm": 0.0, "learning_rate": 9.95682294604077e-07, "loss": 0.4159, "projector_lr": 2.987046883812231e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.25, "rewards_train/margins": 1.0546875, "rewards_train/rejected": -1.3046875, "sft_loss": 0.84375, "step": 322 }, { "dpo_loss": 0.443359375, "epoch": 0.05, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 9.956489374184546e-07, "loss": 0.3796, "projector_lr": 2.986946812255364e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.3125, "rewards_train/margins": 1.4375, "rewards_train/rejected": -1.75, "sft_loss": 0.69921875, "step": 323 }, { "dpo_loss": 0.1728515625, "epoch": 0.05, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 9.956154524378849e-07, "loss": 0.2529, "projector_lr": 2.9868463573136545e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.20703125, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.453125, "sft_loss": 0.49609375, "step": 324 }, { "dpo_loss": 0.412109375, "epoch": 0.05, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 9.95581839671001e-07, "loss": 0.3332, "projector_lr": 2.9867455190130034e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1123046875, "rewards_train/margins": 2.375, "rewards_train/rejected": -2.5, "sft_loss": 0.423828125, "step": 325 }, { "dpo_loss": 0.365234375, "epoch": 0.05, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 9.955480991264702e-07, "loss": 0.2812, "projector_lr": 2.9866442973794106e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 1.796875, "rewards_train/rejected": -2.140625, "sft_loss": 0.7890625, "step": 326 }, { "dpo_loss": 0.380859375, "epoch": 0.05, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 9.955142308129913e-07, "loss": 0.3566, "projector_lr": 2.9865426924389744e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.20703125, "rewards_train/margins": 1.3828125, "rewards_train/rejected": -1.5859375, "sft_loss": 0.87109375, "step": 327 }, { "dpo_loss": 0.32421875, "epoch": 0.05, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 9.95480234739297e-07, "loss": 0.4307, "projector_lr": 2.9864407042178913e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.427734375, "rewards_train/margins": 1.453125, "rewards_train/rejected": -1.875, "sft_loss": 0.83984375, "step": 328 }, { "dpo_loss": 0.349609375, "epoch": 0.05, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 9.954461109141525e-07, "loss": 0.4056, "projector_lr": 2.9863383327424577e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 1.8515625, "rewards_train/rejected": -2.03125, "sft_loss": 0.796875, "step": 329 }, { "dpo_loss": 0.1640625, "epoch": 0.05, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 9.954118593463562e-07, "loss": 0.1784, "projector_lr": 2.9862355780390685e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2412109375, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.171875, "sft_loss": 0.6640625, "step": 330 }, { "dpo_loss": 0.2236328125, "epoch": 0.05, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 9.953774800447389e-07, "loss": 0.3703, "projector_lr": 2.986132440134217e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04052734375, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.125, "sft_loss": 0.72265625, "step": 331 }, { "dpo_loss": 0.31640625, "epoch": 0.05, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 9.953429730181652e-07, "loss": 0.4934, "projector_lr": 2.9860289190544963e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.07568359375, "rewards_train/margins": 2.0, "rewards_train/rejected": -1.9296875, "sft_loss": 0.80859375, "step": 332 }, { "dpo_loss": 0.6328125, "epoch": 0.05, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 9.953083382755322e-07, "loss": 0.5435, "projector_lr": 2.985925014826597e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 0.60546875, "rewards_train/rejected": -0.8359375, "sft_loss": 0.62109375, "step": 333 }, { "dpo_loss": 0.48046875, "epoch": 0.05, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 9.952735758257697e-07, "loss": 0.3428, "projector_lr": 2.985820727477309e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.375, "rewards_train/margins": 0.91015625, "rewards_train/rejected": -1.2890625, "sft_loss": 0.7109375, "step": 334 }, { "dpo_loss": 0.455078125, "epoch": 0.05, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 9.952386856778405e-07, "loss": 0.4165, "projector_lr": 2.985716057033522e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.291015625, "rewards_train/margins": 1.125, "rewards_train/rejected": -1.4140625, "sft_loss": 0.73828125, "step": 335 }, { "dpo_loss": 0.3515625, "epoch": 0.05, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 9.952036678407406e-07, "loss": 0.3032, "projector_lr": 2.9856110035222225e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.5, "rewards_train/margins": 1.328125, "rewards_train/rejected": -0.83203125, "sft_loss": 0.498046875, "step": 336 }, { "dpo_loss": 0.39453125, "epoch": 0.05, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 9.95168522323499e-07, "loss": 0.373, "projector_lr": 2.985505566970497e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.283203125, "rewards_train/margins": 1.1171875, "rewards_train/rejected": -1.40625, "sft_loss": 0.58203125, "step": 337 }, { "dpo_loss": 0.287109375, "epoch": 0.05, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 9.95133249135177e-07, "loss": 0.2604, "projector_lr": 2.985399747405531e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0341796875, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.03125, "sft_loss": 1.0, "step": 338 }, { "dpo_loss": 0.7265625, "epoch": 0.05, "final_loss": 0.7265625, "grad_norm": 0.0, "learning_rate": 9.950978482848693e-07, "loss": 0.6167, "projector_lr": 2.9852935448546083e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.3671875, "rewards_train/margins": 0.84375, "rewards_train/rejected": -1.2109375, "sft_loss": 0.83984375, "step": 339 }, { "dpo_loss": 0.4921875, "epoch": 0.05, "final_loss": 0.4921875, "grad_norm": 0.0, "learning_rate": 9.950623197817036e-07, "loss": 0.4391, "projector_lr": 2.9851869593451113e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.47265625, "rewards_train/margins": 0.73828125, "rewards_train/rejected": -1.2109375, "sft_loss": 0.74609375, "step": 340 }, { "dpo_loss": 0.5078125, "epoch": 0.05, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.950266636348402e-07, "loss": 0.3197, "projector_lr": 2.985079990904521e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 1.359375, "rewards_train/rejected": -1.6484375, "sft_loss": 0.65625, "step": 341 }, { "dpo_loss": 0.37890625, "epoch": 0.05, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 9.949908798534728e-07, "loss": 0.4589, "projector_lr": 2.9849726395604184e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1572265625, "rewards_train/margins": 1.328125, "rewards_train/rejected": -1.484375, "sft_loss": 0.75390625, "step": 342 }, { "dpo_loss": 0.138671875, "epoch": 0.05, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 9.949549684468272e-07, "loss": 0.2681, "projector_lr": 2.9848649053404815e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10595703125, "rewards_train/margins": 2.515625, "rewards_train/rejected": -2.625, "sft_loss": 0.75390625, "step": 343 }, { "dpo_loss": 0.1513671875, "epoch": 0.06, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 9.949189294241627e-07, "loss": 0.2283, "projector_lr": 2.984756788272488e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.234375, "sft_loss": 0.7265625, "step": 344 }, { "dpo_loss": 0.64453125, "epoch": 0.06, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.948827627947714e-07, "loss": 0.3782, "projector_lr": 2.984648288384314e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 1.65625, "rewards_train/rejected": -2.0625, "sft_loss": 0.578125, "step": 345 }, { "dpo_loss": 0.62890625, "epoch": 0.06, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.948464685679784e-07, "loss": 0.5733, "projector_lr": 2.984539405703935e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 1.796875, "rewards_train/rejected": -1.984375, "sft_loss": 0.8125, "step": 346 }, { "dpo_loss": 0.439453125, "epoch": 0.06, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.948100467531413e-07, "loss": 0.3831, "projector_lr": 2.9844301402594236e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1884765625, "rewards_train/margins": 1.28125, "rewards_train/rejected": -1.46875, "sft_loss": 0.75390625, "step": 347 }, { "dpo_loss": 0.640625, "epoch": 0.06, "final_loss": 0.640625, "grad_norm": 0.0, "learning_rate": 9.94773497359651e-07, "loss": 0.5274, "projector_lr": 2.984320492078953e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2470703125, "rewards_train/margins": 0.859375, "rewards_train/rejected": -1.109375, "sft_loss": 0.66015625, "step": 348 }, { "dpo_loss": 0.5078125, "epoch": 0.06, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.947368203969312e-07, "loss": 0.431, "projector_lr": 2.9842104611907936e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 0.6953125, "rewards_train/rejected": -1.1796875, "sft_loss": 0.95703125, "step": 349 }, { "dpo_loss": 0.30078125, "epoch": 0.06, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.947000158744386e-07, "loss": 0.2802, "projector_lr": 2.9841000476233157e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.0751953125, "rewards_train/margins": 2.234375, "rewards_train/rejected": -2.15625, "sft_loss": 0.87109375, "step": 350 }, { "dpo_loss": 0.33984375, "epoch": 0.06, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 9.946630838016623e-07, "loss": 0.2953, "projector_lr": 2.983989251404987e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.244140625, "rewards_train/margins": 1.203125, "rewards_train/rejected": -1.453125, "sft_loss": 0.63671875, "step": 351 }, { "dpo_loss": 0.73828125, "epoch": 0.06, "final_loss": 0.73828125, "grad_norm": 0.0, "learning_rate": 9.946260241881249e-07, "loss": 0.5976, "projector_lr": 2.9838780725643747e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 0.796875, "rewards_train/rejected": -1.5703125, "sft_loss": 0.6953125, "step": 352 }, { "dpo_loss": 0.058837890625, "epoch": 0.06, "final_loss": 0.058837890625, "grad_norm": 0.0, "learning_rate": 9.945888370433812e-07, "loss": 0.1581, "projector_lr": 2.983766511130144e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.043212890625, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.734375, "sft_loss": 0.87109375, "step": 353 }, { "dpo_loss": 0.703125, "epoch": 0.06, "final_loss": 0.703125, "grad_norm": 0.0, "learning_rate": 9.9455152237702e-07, "loss": 0.8709, "projector_lr": 2.98365456713106e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 1.0703125, "rewards_train/rejected": -1.7421875, "sft_loss": 0.9140625, "step": 354 }, { "dpo_loss": 0.33203125, "epoch": 0.06, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.945140801986618e-07, "loss": 0.3803, "projector_lr": 2.9835422405959857e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.59375, "sft_loss": 0.78515625, "step": 355 }, { "dpo_loss": 0.201171875, "epoch": 0.06, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 9.944765105179605e-07, "loss": 0.2848, "projector_lr": 2.9834295315538814e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1630859375, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.21875, "sft_loss": 0.546875, "step": 356 }, { "dpo_loss": 0.294921875, "epoch": 0.06, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.94438813344603e-07, "loss": 0.2772, "projector_lr": 2.9833164400338087e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 1.8828125, "rewards_train/rejected": -2.25, "sft_loss": 0.7109375, "step": 357 }, { "dpo_loss": 0.1298828125, "epoch": 0.06, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 9.944009886883084e-07, "loss": 0.1317, "projector_lr": 2.9832029660649255e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25, "rewards_train/margins": 2.796875, "rewards_train/rejected": -2.546875, "sft_loss": 0.5078125, "step": 358 }, { "dpo_loss": 0.40234375, "epoch": 0.06, "final_loss": 0.40234375, "grad_norm": 0.0, "learning_rate": 9.9436303655883e-07, "loss": 0.4328, "projector_lr": 2.9830891096764897e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.23046875, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.53125, "sft_loss": 0.8046875, "step": 359 }, { "dpo_loss": 0.376953125, "epoch": 0.06, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 9.943249569659522e-07, "loss": 0.3742, "projector_lr": 2.982974870897857e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.326171875, "rewards_train/margins": 1.921875, "rewards_train/rejected": -2.25, "sft_loss": 0.9921875, "step": 360 }, { "dpo_loss": 0.3359375, "epoch": 0.06, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.94286749919494e-07, "loss": 0.4342, "projector_lr": 2.982860249758482e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.328125, "sft_loss": 0.73828125, "step": 361 }, { "dpo_loss": 0.255859375, "epoch": 0.06, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.942484154293059e-07, "loss": 0.2424, "projector_lr": 2.982745246287918e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1220703125, "rewards_train/margins": 1.4296875, "rewards_train/rejected": -1.3046875, "sft_loss": 0.7578125, "step": 362 }, { "dpo_loss": 0.3984375, "epoch": 0.06, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 9.942099535052722e-07, "loss": 0.3961, "projector_lr": 2.9826298605158167e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.388671875, "rewards_train/margins": 1.6875, "rewards_train/rejected": -2.078125, "sft_loss": 0.6953125, "step": 363 }, { "dpo_loss": 0.234375, "epoch": 0.06, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 9.941713641573095e-07, "loss": 0.3497, "projector_lr": 2.9825140924719288e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.24609375, "rewards_train/margins": 1.6875, "rewards_train/rejected": -1.4453125, "sft_loss": 0.6171875, "step": 364 }, { "dpo_loss": 0.328125, "epoch": 0.06, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 9.941326473953673e-07, "loss": 0.3291, "projector_lr": 2.9823979421861024e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 1.515625, "rewards_train/rejected": -1.9765625, "sft_loss": 0.8671875, "step": 365 }, { "dpo_loss": 0.3203125, "epoch": 0.06, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 9.940938032294284e-07, "loss": 0.391, "projector_lr": 2.9822814096882854e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 2.734375, "rewards_train/rejected": -2.90625, "sft_loss": 0.640625, "step": 366 }, { "dpo_loss": 0.3984375, "epoch": 0.06, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 9.940548316695079e-07, "loss": 0.3208, "projector_lr": 2.9821644950085238e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.486328125, "rewards_train/margins": 1.3828125, "rewards_train/rejected": -1.8671875, "sft_loss": 0.74609375, "step": 367 }, { "dpo_loss": 0.376953125, "epoch": 0.06, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 9.94015732725654e-07, "loss": 0.2754, "projector_lr": 2.9820471981769624e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.265625, "rewards_train/margins": 1.515625, "rewards_train/rejected": -1.7734375, "sft_loss": 0.6484375, "step": 368 }, { "dpo_loss": 0.546875, "epoch": 0.06, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 9.93976506407948e-07, "loss": 0.3989, "projector_lr": 2.981929519223844e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.05615234375, "rewards_train/margins": 0.78515625, "rewards_train/rejected": -0.83984375, "sft_loss": 1.015625, "step": 369 }, { "dpo_loss": 0.1669921875, "epoch": 0.06, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 9.939371527265033e-07, "loss": 0.3407, "projector_lr": 2.98181145817951e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.037353515625, "rewards_train/margins": 2.578125, "rewards_train/rejected": -2.53125, "sft_loss": 0.6484375, "step": 370 }, { "dpo_loss": 0.3359375, "epoch": 0.06, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.938976716914667e-07, "loss": 0.2891, "projector_lr": 2.9816930150744003e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 1.2109375, "rewards_train/rejected": -1.7578125, "sft_loss": 0.98828125, "step": 371 }, { "dpo_loss": 0.25, "epoch": 0.06, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 9.93858063313018e-07, "loss": 0.3682, "projector_lr": 2.9815741899390544e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1376953125, "rewards_train/margins": 1.9609375, "rewards_train/rejected": -1.8203125, "sft_loss": 0.6875, "step": 372 }, { "dpo_loss": 0.361328125, "epoch": 0.06, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.938183276013695e-07, "loss": 0.2594, "projector_lr": 2.981454982804109e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 1.6328125, "rewards_train/rejected": -2.15625, "sft_loss": 0.921875, "step": 373 }, { "dpo_loss": 0.318359375, "epoch": 0.06, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.937784645667662e-07, "loss": 0.1997, "projector_lr": 2.981335393700299e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 2.109375, "rewards_train/rejected": -2.71875, "sft_loss": 0.84765625, "step": 374 }, { "dpo_loss": 0.263671875, "epoch": 0.06, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 9.937384742194865e-07, "loss": 0.2708, "projector_lr": 2.9812154226584595e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09375, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -1.8046875, "sft_loss": 0.59375, "step": 375 }, { "dpo_loss": 0.2890625, "epoch": 0.06, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.936983565698407e-07, "loss": 0.2833, "projector_lr": 2.981095069709522e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.138671875, "rewards_train/margins": 1.421875, "rewards_train/rejected": -1.5546875, "sft_loss": 0.59375, "step": 376 }, { "dpo_loss": 0.275390625, "epoch": 0.06, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.93658111628173e-07, "loss": 0.413, "projector_lr": 2.9809743348845194e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.390625, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.6875, "sft_loss": 0.8515625, "step": 377 }, { "dpo_loss": 0.58984375, "epoch": 0.06, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 9.936177394048597e-07, "loss": 0.4062, "projector_lr": 2.9808532182145793e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 0.7890625, "rewards_train/rejected": -1.4609375, "sft_loss": 0.84375, "step": 378 }, { "dpo_loss": 0.34375, "epoch": 0.06, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 9.9357723991031e-07, "loss": 0.2864, "projector_lr": 2.9807317197309302e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.13671875, "rewards_train/margins": 1.6796875, "rewards_train/rejected": -1.8125, "sft_loss": 0.73828125, "step": 379 }, { "dpo_loss": 0.2177734375, "epoch": 0.06, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 9.935366131549662e-07, "loss": 0.2044, "projector_lr": 2.980609839464899e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.96875, "sft_loss": 0.73046875, "step": 380 }, { "dpo_loss": 0.3359375, "epoch": 0.06, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.934958591493031e-07, "loss": 0.5207, "projector_lr": 2.9804875774479096e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0830078125, "rewards_train/margins": 1.9375, "rewards_train/rejected": -1.859375, "sft_loss": 0.79296875, "step": 381 }, { "dpo_loss": 0.28125, "epoch": 0.06, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.934549779038286e-07, "loss": 0.4641, "projector_lr": 2.980364933711486e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34375, "rewards_train/margins": 1.546875, "rewards_train/rejected": -1.890625, "sft_loss": 0.5390625, "step": 382 }, { "dpo_loss": 0.21484375, "epoch": 0.06, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 9.934139694290832e-07, "loss": 0.3715, "projector_lr": 2.98024190828725e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11279296875, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.3125, "sft_loss": 0.640625, "step": 383 }, { "dpo_loss": 0.1728515625, "epoch": 0.06, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 9.933728337356402e-07, "loss": 0.2023, "projector_lr": 2.980118501206921e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.68359375, "rewards_train/margins": 2.59375, "rewards_train/rejected": -1.9140625, "sft_loss": 0.9609375, "step": 384 }, { "dpo_loss": 0.30078125, "epoch": 0.06, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.93331570834106e-07, "loss": 0.4106, "projector_lr": 2.9799947125023178e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0625, "rewards_train/margins": 1.515625, "rewards_train/rejected": -1.578125, "sft_loss": 0.73046875, "step": 385 }, { "dpo_loss": 0.3515625, "epoch": 0.06, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 9.932901807351192e-07, "loss": 0.4124, "projector_lr": 2.9798705422053578e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.03466796875, "rewards_train/margins": 1.6640625, "rewards_train/rejected": -1.625, "sft_loss": 0.7109375, "step": 386 }, { "dpo_loss": 0.1953125, "epoch": 0.06, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 9.932486634493517e-07, "loss": 0.2754, "projector_lr": 2.9797459903480554e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0059814453125, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.390625, "sft_loss": 0.70703125, "step": 387 }, { "dpo_loss": 0.1474609375, "epoch": 0.06, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 9.932070189875082e-07, "loss": 0.2772, "projector_lr": 2.9796210569625246e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.33203125, "rewards_train/margins": 2.75, "rewards_train/rejected": -2.421875, "sft_loss": 0.79296875, "step": 388 }, { "dpo_loss": 0.11083984375, "epoch": 0.06, "final_loss": 0.11083984375, "grad_norm": 0.0, "learning_rate": 9.93165247360326e-07, "loss": 0.1854, "projector_lr": 2.979495742080978e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.275390625, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.03125, "sft_loss": 0.609375, "step": 389 }, { "dpo_loss": 0.3828125, "epoch": 0.06, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 9.93123348578575e-07, "loss": 0.3527, "projector_lr": 2.979370045735725e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1904296875, "rewards_train/margins": 1.75, "rewards_train/rejected": -1.9453125, "sft_loss": 0.734375, "step": 390 }, { "dpo_loss": 0.265625, "epoch": 0.06, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.930813226530582e-07, "loss": 0.3715, "projector_lr": 2.979243967959175e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2373046875, "rewards_train/margins": 1.5703125, "rewards_train/rejected": -1.328125, "sft_loss": 0.77734375, "step": 391 }, { "dpo_loss": 0.458984375, "epoch": 0.06, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 9.930391695946116e-07, "loss": 0.3343, "projector_lr": 2.979117508783835e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 1.3828125, "rewards_train/rejected": -1.6875, "sft_loss": 0.734375, "step": 392 }, { "dpo_loss": 0.37109375, "epoch": 0.06, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 9.929968894141033e-07, "loss": 0.3184, "projector_lr": 2.97899066824231e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.1689453125, "rewards_train/margins": 1.6953125, "rewards_train/rejected": -1.5234375, "sft_loss": 0.8359375, "step": 393 }, { "dpo_loss": 0.1845703125, "epoch": 0.06, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.929544821224347e-07, "loss": 0.263, "projector_lr": 2.9788634463673045e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.20703125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -2.640625, "sft_loss": 0.6640625, "step": 394 }, { "dpo_loss": 0.259765625, "epoch": 0.06, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 9.9291194773054e-07, "loss": 0.3186, "projector_lr": 2.9787358431916197e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.055908203125, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -1.8671875, "sft_loss": 0.55859375, "step": 395 }, { "dpo_loss": 0.390625, "epoch": 0.06, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.928692862493856e-07, "loss": 0.2877, "projector_lr": 2.9786078587481568e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.162109375, "rewards_train/margins": 2.015625, "rewards_train/rejected": -1.859375, "sft_loss": 0.84765625, "step": 396 }, { "dpo_loss": 0.6796875, "epoch": 0.06, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 9.928264976899714e-07, "loss": 0.462, "projector_lr": 2.978479493069914e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 0.9296875, "rewards_train/rejected": -1.5625, "sft_loss": 0.625, "step": 397 }, { "dpo_loss": 0.1435546875, "epoch": 0.06, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 9.927835820633295e-07, "loss": 0.2268, "projector_lr": 2.9783507461899886e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.765625, "sft_loss": 0.83203125, "step": 398 }, { "dpo_loss": 0.578125, "epoch": 0.06, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 9.927405393805251e-07, "loss": 0.5015, "projector_lr": 2.978221618141575e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.240234375, "rewards_train/margins": 0.61328125, "rewards_train/rejected": -0.8515625, "sft_loss": 0.8828125, "step": 399 }, { "dpo_loss": 0.2216796875, "epoch": 0.06, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 9.92697369652656e-07, "loss": 0.2996, "projector_lr": 2.9780921089579685e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.087890625, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.15625, "sft_loss": 0.61328125, "step": 400 }, { "dpo_loss": 0.51953125, "epoch": 0.06, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 9.92654072890853e-07, "loss": 0.4166, "projector_lr": 2.977962218672559e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.28125, "rewards_train/margins": 1.53125, "rewards_train/rejected": -1.8125, "sft_loss": 0.81640625, "step": 401 }, { "dpo_loss": 0.283203125, "epoch": 0.06, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 9.926106491062791e-07, "loss": 0.224, "projector_lr": 2.977831947318838e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 2.375, "rewards_train/rejected": -3.09375, "sft_loss": 0.8515625, "step": 402 }, { "dpo_loss": 0.5234375, "epoch": 0.06, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.925670983101309e-07, "loss": 0.3962, "projector_lr": 2.977701294930393e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3125, "rewards_train/margins": 1.625, "rewards_train/rejected": -2.9375, "sft_loss": 0.8671875, "step": 403 }, { "dpo_loss": 0.263671875, "epoch": 0.06, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 9.925234205136368e-07, "loss": 0.2385, "projector_lr": 2.9775702615409107e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 1.796875, "rewards_train/rejected": -2.09375, "sft_loss": 0.7421875, "step": 404 }, { "dpo_loss": 0.1845703125, "epoch": 0.06, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.924796157280586e-07, "loss": 0.4349, "projector_lr": 2.977438847184176e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.1875, "sft_loss": 0.5859375, "step": 405 }, { "dpo_loss": 0.30078125, "epoch": 0.06, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.924356839646907e-07, "loss": 0.3313, "projector_lr": 2.977307051894072e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 1.890625, "rewards_train/rejected": -2.15625, "sft_loss": 1.2890625, "step": 406 }, { "dpo_loss": 0.36328125, "epoch": 0.07, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 9.9239162523486e-07, "loss": 0.4815, "projector_lr": 2.9771748757045803e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.609375, "sft_loss": 0.5703125, "step": 407 }, { "dpo_loss": 0.1796875, "epoch": 0.07, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 9.923474395499264e-07, "loss": 0.1993, "projector_lr": 2.9770423186497798e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.087890625, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.40625, "sft_loss": 0.66796875, "step": 408 }, { "dpo_loss": 0.21484375, "epoch": 0.07, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 9.923031269212827e-07, "loss": 0.3583, "projector_lr": 2.9769093807638484e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.078125, "sft_loss": 0.83203125, "step": 409 }, { "dpo_loss": 0.28125, "epoch": 0.07, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.922586873603538e-07, "loss": 0.2359, "projector_lr": 2.9767760620810617e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 2.09375, "rewards_train/rejected": -2.625, "sft_loss": 0.8984375, "step": 410 }, { "dpo_loss": 0.28515625, "epoch": 0.07, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 9.922141208785978e-07, "loss": 0.2757, "projector_lr": 2.976642362635794e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 3.09375, "rewards_train/rejected": -3.734375, "sft_loss": 0.890625, "step": 411 }, { "dpo_loss": 0.1884765625, "epoch": 0.07, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 9.92169427487506e-07, "loss": 0.2001, "projector_lr": 2.9765082824625177e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 2.8125, "rewards_train/rejected": -3.421875, "sft_loss": 0.74609375, "step": 412 }, { "dpo_loss": 0.427734375, "epoch": 0.07, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 9.921246071986009e-07, "loss": 0.3964, "projector_lr": 2.9763738215958028e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 1.4765625, "rewards_train/rejected": -1.9609375, "sft_loss": 0.9765625, "step": 413 }, { "dpo_loss": 0.62890625, "epoch": 0.07, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.920796600234392e-07, "loss": 0.4112, "projector_lr": 2.9762389800703176e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0, "rewards_train/margins": 1.3828125, "rewards_train/rejected": -2.390625, "sft_loss": 0.66015625, "step": 414 }, { "dpo_loss": 0.57421875, "epoch": 0.07, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.9203458597361e-07, "loss": 0.4596, "projector_lr": 2.97610375792083e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 1.890625, "rewards_train/rejected": -2.59375, "sft_loss": 0.87890625, "step": 415 }, { "dpo_loss": 0.232421875, "epoch": 0.07, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.919893850607343e-07, "loss": 0.2614, "projector_lr": 2.9759681551822033e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.4375, "sft_loss": 0.73046875, "step": 416 }, { "dpo_loss": 0.5703125, "epoch": 0.07, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 9.919440572964669e-07, "loss": 0.5486, "projector_lr": 2.975832171889401e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.875, "rewards_train/margins": 1.2109375, "rewards_train/rejected": -2.09375, "sft_loss": 0.6875, "step": 417 }, { "dpo_loss": 0.3984375, "epoch": 0.07, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 9.918986026924947e-07, "loss": 0.3305, "projector_lr": 2.9756958080774842e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.392578125, "rewards_train/margins": 1.0078125, "rewards_train/rejected": -1.3984375, "sft_loss": 0.6875, "step": 418 }, { "dpo_loss": 0.79296875, "epoch": 0.07, "final_loss": 0.79296875, "grad_norm": 0.0, "learning_rate": 9.918530212605373e-07, "loss": 0.664, "projector_lr": 2.9755590637816126e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.4609375, "rewards_train/margins": 0.484375, "rewards_train/rejected": -0.9453125, "sft_loss": 0.80078125, "step": 419 }, { "dpo_loss": 0.63671875, "epoch": 0.07, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 9.918073130123473e-07, "loss": 0.4515, "projector_lr": 2.9754219390370424e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.451171875, "rewards_train/margins": 1.578125, "rewards_train/rejected": -2.03125, "sft_loss": 0.74609375, "step": 420 }, { "dpo_loss": 0.2412109375, "epoch": 0.07, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.9176147795971e-07, "loss": 0.3095, "projector_lr": 2.9752844338791307e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1787109375, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.1875, "sft_loss": 0.734375, "step": 421 }, { "dpo_loss": 0.1630859375, "epoch": 0.07, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 9.917155161144428e-07, "loss": 0.3349, "projector_lr": 2.9751465483433287e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.921875, "sft_loss": 0.75, "step": 422 }, { "dpo_loss": 0.228515625, "epoch": 0.07, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 9.916694274883964e-07, "loss": 0.2479, "projector_lr": 2.9750082824651893e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.703125, "sft_loss": 0.734375, "step": 423 }, { "dpo_loss": 0.609375, "epoch": 0.07, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 9.916232120934539e-07, "loss": 0.3726, "projector_lr": 2.974869636280362e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.21875, "rewards_train/margins": 1.1640625, "rewards_train/rejected": -1.3828125, "sft_loss": 0.71484375, "step": 424 }, { "dpo_loss": 0.431640625, "epoch": 0.07, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 9.915768699415311e-07, "loss": 0.3214, "projector_lr": 2.974730609824594e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.388671875, "rewards_train/margins": 1.5390625, "rewards_train/rejected": -1.921875, "sft_loss": 0.625, "step": 425 }, { "dpo_loss": 0.2333984375, "epoch": 0.07, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 9.91530401044577e-07, "loss": 0.5174, "projector_lr": 2.9745912031337315e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.296875, "sft_loss": 0.83203125, "step": 426 }, { "dpo_loss": 0.55859375, "epoch": 0.07, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 9.914838054145726e-07, "loss": 0.4523, "projector_lr": 2.9744514162437176e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 1.3359375, "rewards_train/rejected": -2.015625, "sft_loss": 0.8046875, "step": 427 }, { "dpo_loss": 0.6875, "epoch": 0.07, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 9.914370830635316e-07, "loss": 0.5899, "projector_lr": 2.974311249190595e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.03125, "rewards_train/margins": 1.171875, "rewards_train/rejected": -2.203125, "sft_loss": 0.65234375, "step": 428 }, { "dpo_loss": 0.2119140625, "epoch": 0.07, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 9.913902340035007e-07, "loss": 0.2224, "projector_lr": 2.9741707020105024e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1005859375, "rewards_train/margins": 2.46875, "rewards_train/rejected": -2.5625, "sft_loss": 0.640625, "step": 429 }, { "dpo_loss": 0.57421875, "epoch": 0.07, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.913432582465594e-07, "loss": 0.3657, "projector_lr": 2.9740297747396783e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.423828125, "rewards_train/margins": 2.375, "rewards_train/rejected": -2.796875, "sft_loss": 0.73828125, "step": 430 }, { "dpo_loss": 0.1865234375, "epoch": 0.07, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 9.912961558048194e-07, "loss": 0.381, "projector_lr": 2.9738884674144585e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.259765625, "rewards_train/margins": 2.515625, "rewards_train/rejected": -2.78125, "sft_loss": 0.64453125, "step": 431 }, { "dpo_loss": 0.2734375, "epoch": 0.07, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 9.912489266904254e-07, "loss": 0.2494, "projector_lr": 2.9737467800712764e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.322265625, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.875, "sft_loss": 0.67578125, "step": 432 }, { "dpo_loss": 0.90234375, "epoch": 0.07, "final_loss": 0.90234375, "grad_norm": 0.0, "learning_rate": 9.912015709155546e-07, "loss": 0.58, "projector_lr": 2.9736047127466637e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.4609375, "rewards_train/margins": 0.259765625, "rewards_train/rejected": -0.72265625, "sft_loss": 0.5703125, "step": 433 }, { "dpo_loss": 0.361328125, "epoch": 0.07, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.91154088492417e-07, "loss": 0.4187, "projector_lr": 2.9734622654772508e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5625, "rewards_train/margins": 1.84375, "rewards_train/rejected": -2.40625, "sft_loss": 0.83984375, "step": 434 }, { "dpo_loss": 0.318359375, "epoch": 0.07, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.911064794332548e-07, "loss": 0.6049, "projector_lr": 2.973319438299765e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.109375, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -1.8984375, "sft_loss": 0.96875, "step": 435 }, { "dpo_loss": 0.14453125, "epoch": 0.07, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 9.910587437503438e-07, "loss": 0.2042, "projector_lr": 2.9731762312510314e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.4375, "sft_loss": 0.67578125, "step": 436 }, { "dpo_loss": 0.28515625, "epoch": 0.07, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 9.910108814559914e-07, "loss": 0.2587, "projector_lr": 2.973032644367974e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.083984375, "rewards_train/margins": 2.578125, "rewards_train/rejected": -2.65625, "sft_loss": 0.66015625, "step": 437 }, { "dpo_loss": 0.52734375, "epoch": 0.07, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 9.909628925625383e-07, "loss": 0.3447, "projector_lr": 2.972888677687615e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5625, "rewards_train/margins": 0.7734375, "rewards_train/rejected": -1.3359375, "sft_loss": 0.91015625, "step": 438 }, { "dpo_loss": 0.384765625, "epoch": 0.07, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 9.909147770823575e-07, "loss": 0.3322, "projector_lr": 2.9727443312470726e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 1.1328125, "rewards_train/rejected": -1.359375, "sft_loss": 0.79296875, "step": 439 }, { "dpo_loss": 0.130859375, "epoch": 0.07, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 9.908665350278548e-07, "loss": 0.153, "projector_lr": 2.9725996050835647e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.142578125, "rewards_train/margins": 2.75, "rewards_train/rejected": -2.90625, "sft_loss": 0.58984375, "step": 440 }, { "dpo_loss": 0.365234375, "epoch": 0.07, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 9.90818166411469e-07, "loss": 0.4085, "projector_lr": 2.9724544992344076e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 2.390625, "rewards_train/rejected": -2.65625, "sft_loss": 0.88671875, "step": 441 }, { "dpo_loss": 0.09912109375, "epoch": 0.07, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 9.90769671245671e-07, "loss": 0.3692, "projector_lr": 2.972309013737013e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33984375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.0625, "sft_loss": 0.80859375, "step": 442 }, { "dpo_loss": 0.1689453125, "epoch": 0.07, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 9.907210495429641e-07, "loss": 0.1667, "projector_lr": 2.9721631486288926e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.29296875, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.1875, "sft_loss": 0.80859375, "step": 443 }, { "dpo_loss": 0.248046875, "epoch": 0.07, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 9.90672301315885e-07, "loss": 0.3212, "projector_lr": 2.9720169039476546e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1123046875, "rewards_train/margins": 2.875, "rewards_train/rejected": -2.984375, "sft_loss": 0.73828125, "step": 444 }, { "dpo_loss": 0.255859375, "epoch": 0.07, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.906234265770024e-07, "loss": 0.2329, "projector_lr": 2.9718702797310074e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.11572265625, "rewards_train/margins": 2.0625, "rewards_train/rejected": -1.953125, "sft_loss": 0.6484375, "step": 445 }, { "dpo_loss": 0.09521484375, "epoch": 0.07, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 9.90574425338918e-07, "loss": 0.124, "projector_lr": 2.971723276016754e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.162109375, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.75, "sft_loss": 0.95703125, "step": 446 }, { "dpo_loss": 0.55078125, "epoch": 0.07, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 9.905252976142658e-07, "loss": 0.4878, "projector_lr": 2.971575892842797e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 0.9921875, "rewards_train/rejected": -1.5703125, "sft_loss": 0.55859375, "step": 447 }, { "dpo_loss": 0.384765625, "epoch": 0.07, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 9.904760434157127e-07, "loss": 0.4074, "projector_lr": 2.971428130247138e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 1.7578125, "rewards_train/rejected": -2.46875, "sft_loss": 0.7265625, "step": 448 }, { "dpo_loss": 0.3359375, "epoch": 0.07, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.904266627559583e-07, "loss": 0.3983, "projector_lr": 2.9712799882678747e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.296875, "sft_loss": 0.63671875, "step": 449 }, { "dpo_loss": 0.2080078125, "epoch": 0.07, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 9.90377155647734e-07, "loss": 0.4149, "projector_lr": 2.9711314669432023e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1796875, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.140625, "sft_loss": 0.87109375, "step": 450 }, { "dpo_loss": 0.193359375, "epoch": 0.07, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 9.903275221038051e-07, "loss": 0.1735, "projector_lr": 2.9709825663114156e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.224609375, "rewards_train/margins": 2.21875, "rewards_train/rejected": -1.9921875, "sft_loss": 0.7578125, "step": 451 }, { "dpo_loss": 0.384765625, "epoch": 0.07, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 9.902777621369685e-07, "loss": 0.2623, "projector_lr": 2.9708332864109057e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.40625, "rewards_train/margins": 1.4296875, "rewards_train/rejected": -1.8359375, "sft_loss": 0.91796875, "step": 452 }, { "dpo_loss": 0.169921875, "epoch": 0.07, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 9.902278757600537e-07, "loss": 0.3545, "projector_lr": 2.9706836272801614e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04052734375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -3.390625, "sft_loss": 0.59765625, "step": 453 }, { "dpo_loss": 0.181640625, "epoch": 0.07, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 9.901778629859235e-07, "loss": 0.2632, "projector_lr": 2.9705335889577707e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.47265625, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.8125, "sft_loss": 0.65234375, "step": 454 }, { "dpo_loss": 0.291015625, "epoch": 0.07, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 9.901277238274726e-07, "loss": 0.376, "projector_lr": 2.970383171482418e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.578125, "sft_loss": 0.62109375, "step": 455 }, { "dpo_loss": 0.3203125, "epoch": 0.07, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 9.900774582976288e-07, "loss": 0.4611, "projector_lr": 2.9702323748928865e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.734375, "rewards_train/margins": 1.8125, "rewards_train/rejected": -2.546875, "sft_loss": 0.6796875, "step": 456 }, { "dpo_loss": 0.470703125, "epoch": 0.07, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 9.90027066409352e-07, "loss": 0.3254, "projector_lr": 2.9700811992280562e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -2.5, "sft_loss": 0.8359375, "step": 457 }, { "dpo_loss": 0.30859375, "epoch": 0.07, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 9.89976548175635e-07, "loss": 0.2722, "projector_lr": 2.9699296445269054e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1796875, "rewards_train/margins": 1.734375, "rewards_train/rejected": -1.921875, "sft_loss": 0.953125, "step": 458 }, { "dpo_loss": 0.388671875, "epoch": 0.07, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 9.899259036095034e-07, "loss": 0.2222, "projector_lr": 2.96977771082851e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 1.5390625, "rewards_train/rejected": -2.0, "sft_loss": 0.7734375, "step": 459 }, { "dpo_loss": 0.275390625, "epoch": 0.07, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.898751327240144e-07, "loss": 0.2847, "projector_lr": 2.9696253981720438e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.34375, "rewards_train/margins": 1.4609375, "rewards_train/rejected": -1.8046875, "sft_loss": 0.7109375, "step": 460 }, { "dpo_loss": 0.373046875, "epoch": 0.07, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 9.898242355322592e-07, "loss": 0.4422, "projector_lr": 2.969472706596778e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4375, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.609375, "sft_loss": 0.71484375, "step": 461 }, { "dpo_loss": 0.384765625, "epoch": 0.07, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 9.897732120473602e-07, "loss": 0.4391, "projector_lr": 2.969319636142081e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 1.4765625, "rewards_train/rejected": -2.21875, "sft_loss": 0.796875, "step": 462 }, { "dpo_loss": 0.1611328125, "epoch": 0.07, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 9.897220622824734e-07, "loss": 0.1363, "projector_lr": 2.9691661868474207e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10595703125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.671875, "sft_loss": 0.5546875, "step": 463 }, { "dpo_loss": 0.25390625, "epoch": 0.07, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.896707862507867e-07, "loss": 0.3281, "projector_lr": 2.9690123587523605e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.10009765625, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.171875, "sft_loss": 0.65234375, "step": 464 }, { "dpo_loss": 0.3125, "epoch": 0.07, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 9.896193839655207e-07, "loss": 0.3874, "projector_lr": 2.9688581518965628e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.447265625, "rewards_train/margins": 1.515625, "rewards_train/rejected": -1.9609375, "sft_loss": 0.8203125, "step": 465 }, { "dpo_loss": 0.44921875, "epoch": 0.07, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 9.89567855439929e-07, "loss": 0.4136, "projector_lr": 2.968703566319787e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.84375, "sft_loss": 0.62109375, "step": 466 }, { "dpo_loss": 0.08056640625, "epoch": 0.07, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 9.89516200687297e-07, "loss": 0.0958, "projector_lr": 2.9685486020618914e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.271484375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -2.5625, "sft_loss": 0.5625, "step": 467 }, { "dpo_loss": 0.1572265625, "epoch": 0.07, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 9.894644197209432e-07, "loss": 0.4333, "projector_lr": 2.96839325916283e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 2.65625, "rewards_train/rejected": -2.953125, "sft_loss": 0.64453125, "step": 468 }, { "dpo_loss": 0.265625, "epoch": 0.08, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.894125125542186e-07, "loss": 0.2909, "projector_lr": 2.968237537662656e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.037109375, "rewards_train/margins": 1.7578125, "rewards_train/rejected": -1.7265625, "sft_loss": 0.64453125, "step": 469 }, { "dpo_loss": 0.30078125, "epoch": 0.08, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.893604792005061e-07, "loss": 0.3511, "projector_lr": 2.9680814376015187e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.068359375, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.25, "sft_loss": 0.671875, "step": 470 }, { "dpo_loss": 0.1767578125, "epoch": 0.08, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 9.893083196732224e-07, "loss": 0.3285, "projector_lr": 2.9679249590196673e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4140625, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.171875, "sft_loss": 0.60546875, "step": 471 }, { "dpo_loss": 0.1650390625, "epoch": 0.08, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 9.892560339858153e-07, "loss": 0.3781, "projector_lr": 2.9677681019574462e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 2.375, "rewards_train/rejected": -3.03125, "sft_loss": 0.71875, "step": 472 }, { "dpo_loss": 0.12890625, "epoch": 0.08, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 9.892036221517663e-07, "loss": 0.1821, "projector_lr": 2.9676108664552993e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.224609375, "rewards_train/margins": 2.984375, "rewards_train/rejected": -2.765625, "sft_loss": 0.64453125, "step": 473 }, { "dpo_loss": 0.478515625, "epoch": 0.08, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 9.891510841845886e-07, "loss": 0.4169, "projector_lr": 2.967453252553766e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 1.7421875, "rewards_train/rejected": -2.25, "sft_loss": 0.66015625, "step": 474 }, { "dpo_loss": 0.27734375, "epoch": 0.08, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.890984200978285e-07, "loss": 0.1861, "projector_lr": 2.9672952602934856e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 2.0, "rewards_train/rejected": -2.578125, "sft_loss": 0.61328125, "step": 475 }, { "dpo_loss": 0.96484375, "epoch": 0.08, "final_loss": 0.96484375, "grad_norm": 0.0, "learning_rate": 9.890456299050642e-07, "loss": 0.6576, "projector_lr": 2.967136889715193e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.484375, "rewards_train/margins": 0.78125, "rewards_train/rejected": -2.265625, "sft_loss": 0.78515625, "step": 476 }, { "dpo_loss": 0.5234375, "epoch": 0.08, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.889927136199073e-07, "loss": 0.4105, "projector_lr": 2.9669781408597224e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.416015625, "rewards_train/margins": 1.3984375, "rewards_train/rejected": -1.8125, "sft_loss": 0.77734375, "step": 477 }, { "dpo_loss": 0.10693359375, "epoch": 0.08, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 9.88939671256001e-07, "loss": 0.2227, "projector_lr": 2.9668190137680035e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2275390625, "rewards_train/margins": 3.109375, "rewards_train/rejected": -2.890625, "sft_loss": 0.72265625, "step": 478 }, { "dpo_loss": 0.41796875, "epoch": 0.08, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.888865028270217e-07, "loss": 0.4271, "projector_lr": 2.966659508481065e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 1.0625, "rewards_train/rejected": -1.203125, "sft_loss": 0.8046875, "step": 479 }, { "dpo_loss": 0.62109375, "epoch": 0.08, "final_loss": 0.62109375, "grad_norm": 0.0, "learning_rate": 9.888332083466778e-07, "loss": 0.6391, "projector_lr": 2.9664996250400334e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.435546875, "rewards_train/margins": 1.5, "rewards_train/rejected": -1.9375, "sft_loss": 0.8046875, "step": 480 }, { "dpo_loss": 0.2255859375, "epoch": 0.08, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 9.887797878287104e-07, "loss": 0.3408, "projector_lr": 2.9663393634861313e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 2.15625, "rewards_train/rejected": -2.453125, "sft_loss": 0.828125, "step": 481 }, { "dpo_loss": 0.09130859375, "epoch": 0.08, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 9.88726241286893e-07, "loss": 0.2188, "projector_lr": 2.966178723860679e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.384765625, "rewards_train/margins": 3.5625, "rewards_train/rejected": -3.1875, "sft_loss": 0.68359375, "step": 482 }, { "dpo_loss": 0.279296875, "epoch": 0.08, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 9.886725687350319e-07, "loss": 0.202, "projector_lr": 2.966017706205096e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 1.8125, "rewards_train/rejected": -2.40625, "sft_loss": 0.609375, "step": 483 }, { "dpo_loss": 0.390625, "epoch": 0.08, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.886187701869659e-07, "loss": 0.2847, "projector_lr": 2.9658563105608976e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.37890625, "rewards_train/margins": 2.71875, "rewards_train/rejected": -2.328125, "sft_loss": 0.63671875, "step": 484 }, { "dpo_loss": 0.255859375, "epoch": 0.08, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.885648456565655e-07, "loss": 0.3117, "projector_lr": 2.965694536969697e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.23828125, "rewards_train/margins": 3.140625, "rewards_train/rejected": -3.375, "sft_loss": 0.82421875, "step": 485 }, { "dpo_loss": 0.0986328125, "epoch": 0.08, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 9.885107951577347e-07, "loss": 0.1861, "projector_lr": 2.9655323854732046e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08056640625, "rewards_train/margins": 3.453125, "rewards_train/rejected": -3.53125, "sft_loss": 0.75390625, "step": 486 }, { "dpo_loss": 0.640625, "epoch": 0.08, "final_loss": 0.640625, "grad_norm": 0.0, "learning_rate": 9.884566187044094e-07, "loss": 0.5123, "projector_lr": 2.9653698561132283e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 1.078125, "rewards_train/rejected": -1.6484375, "sft_loss": 0.74609375, "step": 487 }, { "dpo_loss": 0.2041015625, "epoch": 0.08, "final_loss": 0.2041015625, "grad_norm": 0.0, "learning_rate": 9.884023163105581e-07, "loss": 0.3075, "projector_lr": 2.9652069489316746e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.71875, "sft_loss": 0.80859375, "step": 488 }, { "dpo_loss": 0.4609375, "epoch": 0.08, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 9.883478879901818e-07, "loss": 0.356, "projector_lr": 2.9650436639705457e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.671875, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.984375, "sft_loss": 0.7109375, "step": 489 }, { "dpo_loss": 0.1494140625, "epoch": 0.08, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 9.882933337573141e-07, "loss": 0.3597, "projector_lr": 2.9648800012719424e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 2.671875, "rewards_train/rejected": -2.875, "sft_loss": 0.76953125, "step": 490 }, { "dpo_loss": 0.63671875, "epoch": 0.08, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 9.882386536260205e-07, "loss": 0.3846, "projector_lr": 2.964715960878062e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.416015625, "rewards_train/margins": 1.34375, "rewards_train/rejected": -1.7578125, "sft_loss": 0.93359375, "step": 491 }, { "dpo_loss": 0.333984375, "epoch": 0.08, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 9.881838476103998e-07, "loss": 0.2824, "projector_lr": 2.9645515428312e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 1.34375, "rewards_train/rejected": -1.5234375, "sft_loss": 0.59375, "step": 492 }, { "dpo_loss": 0.318359375, "epoch": 0.08, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.881289157245828e-07, "loss": 0.3584, "projector_lr": 2.9643867471737483e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2109375, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.453125, "sft_loss": 0.7734375, "step": 493 }, { "dpo_loss": 0.1025390625, "epoch": 0.08, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 9.880738579827323e-07, "loss": 0.1661, "projector_lr": 2.964221573948197e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.126953125, "rewards_train/margins": 2.953125, "rewards_train/rejected": -3.078125, "sft_loss": 0.640625, "step": 494 }, { "dpo_loss": 0.66015625, "epoch": 0.08, "final_loss": 0.66015625, "grad_norm": 0.0, "learning_rate": 9.880186743990447e-07, "loss": 0.7324, "projector_lr": 2.964056023197134e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 0.90625, "rewards_train/rejected": -1.2421875, "sft_loss": 0.7109375, "step": 495 }, { "dpo_loss": 0.2109375, "epoch": 0.08, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 9.879633649877476e-07, "loss": 0.2992, "projector_lr": 2.963890094963243e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.451171875, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.9375, "sft_loss": 0.77734375, "step": 496 }, { "dpo_loss": 0.453125, "epoch": 0.08, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 9.87907929763102e-07, "loss": 0.3697, "projector_lr": 2.9637237892893057e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 1.734375, "rewards_train/rejected": -1.90625, "sft_loss": 0.7265625, "step": 497 }, { "dpo_loss": 0.1875, "epoch": 0.08, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 9.878523687394008e-07, "loss": 0.1746, "projector_lr": 2.963557106218203e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.125, "sft_loss": 0.6875, "step": 498 }, { "dpo_loss": 0.400390625, "epoch": 0.08, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 9.877966819309697e-07, "loss": 0.4619, "projector_lr": 2.9633900457929093e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.9375, "sft_loss": 0.9609375, "step": 499 }, { "dpo_loss": 0.130859375, "epoch": 0.08, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 9.877408693521662e-07, "loss": 0.3213, "projector_lr": 2.963222608056499e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.6875, "sft_loss": 0.7421875, "step": 500 }, { "dpo_loss": 0.404296875, "epoch": 0.08, "final_loss": 0.404296875, "grad_norm": 0.0, "learning_rate": 9.876849310173813e-07, "loss": 0.3102, "projector_lr": 2.963054793052144e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.375, "rewards_train/margins": 1.09375, "rewards_train/rejected": -1.46875, "sft_loss": 0.6015625, "step": 501 }, { "dpo_loss": 0.2412109375, "epoch": 0.08, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.876288669410373e-07, "loss": 0.3667, "projector_lr": 2.962886600823112e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.53125, "sft_loss": 0.59375, "step": 502 }, { "dpo_loss": 0.33203125, "epoch": 0.08, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.875726771375895e-07, "loss": 0.2682, "projector_lr": 2.9627180314127684e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4296875, "rewards_train/margins": 1.3359375, "rewards_train/rejected": -1.765625, "sft_loss": 0.62109375, "step": 503 }, { "dpo_loss": 0.1396484375, "epoch": 0.08, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 9.875163616215255e-07, "loss": 0.5339, "projector_lr": 2.9625490848645765e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.201171875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -3.15625, "sft_loss": 0.57421875, "step": 504 }, { "dpo_loss": 0.1943359375, "epoch": 0.08, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 9.874599204073654e-07, "loss": 0.2786, "projector_lr": 2.9623797612220962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.345703125, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.515625, "sft_loss": 0.91796875, "step": 505 }, { "dpo_loss": 0.44921875, "epoch": 0.08, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 9.874033535096615e-07, "loss": 0.4254, "projector_lr": 2.9622100605289846e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.0185546875, "rewards_train/margins": 1.0625, "rewards_train/rejected": -1.078125, "sft_loss": 0.81640625, "step": 506 }, { "dpo_loss": 0.41015625, "epoch": 0.08, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 9.87346660942999e-07, "loss": 0.3336, "projector_lr": 2.9620399828289972e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.53125, "rewards_train/margins": 1.3203125, "rewards_train/rejected": -1.8515625, "sft_loss": 0.9375, "step": 507 }, { "dpo_loss": 0.1650390625, "epoch": 0.08, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 9.872898427219948e-07, "loss": 0.1559, "projector_lr": 2.9618695281659845e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.050537109375, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.234375, "sft_loss": 0.66015625, "step": 508 }, { "dpo_loss": 0.44140625, "epoch": 0.08, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 9.872328988612986e-07, "loss": 0.4076, "projector_lr": 2.961698696583896e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.28125, "rewards_train/margins": 1.3125, "rewards_train/rejected": -1.59375, "sft_loss": 0.6953125, "step": 509 }, { "dpo_loss": 0.28515625, "epoch": 0.08, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 9.871758293755926e-07, "loss": 0.2362, "projector_lr": 2.9615274881267777e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.4375, "rewards_train/margins": 1.765625, "rewards_train/rejected": -2.203125, "sft_loss": 0.6640625, "step": 510 }, { "dpo_loss": 0.59765625, "epoch": 0.08, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 9.87118634279591e-07, "loss": 0.4795, "projector_lr": 2.9613559028387736e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 1.3046875, "rewards_train/rejected": -2.125, "sft_loss": 0.62890625, "step": 511 }, { "dpo_loss": 0.0947265625, "epoch": 0.08, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 9.87061313588041e-07, "loss": 0.1782, "projector_lr": 2.9611839407641227e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.890625, "sft_loss": 0.6015625, "step": 512 }, { "dpo_loss": 0.16796875, "epoch": 0.08, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.870038673157213e-07, "loss": 0.2838, "projector_lr": 2.961011601947164e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.263671875, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.71875, "sft_loss": 0.71484375, "step": 513 }, { "dpo_loss": 0.255859375, "epoch": 0.08, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.869462954774438e-07, "loss": 0.2591, "projector_lr": 2.9608388864323315e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.06884765625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.25, "sft_loss": 0.83984375, "step": 514 }, { "dpo_loss": 0.353515625, "epoch": 0.08, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 9.868885980880524e-07, "loss": 0.2927, "projector_lr": 2.9606657942641575e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.09375, "rewards_train/margins": 1.0625, "rewards_train/rejected": -1.15625, "sft_loss": 0.703125, "step": 515 }, { "dpo_loss": 0.140625, "epoch": 0.08, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 9.868307751624234e-07, "loss": 0.1948, "projector_lr": 2.9604923254872704e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31640625, "rewards_train/margins": 2.140625, "rewards_train/rejected": -2.453125, "sft_loss": 0.69921875, "step": 516 }, { "dpo_loss": 0.35546875, "epoch": 0.08, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 9.867728267154656e-07, "loss": 0.2921, "projector_lr": 2.9603184801463965e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.259765625, "rewards_train/margins": 1.84375, "rewards_train/rejected": -1.5859375, "sft_loss": 0.81640625, "step": 517 }, { "dpo_loss": 0.1337890625, "epoch": 0.08, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 9.867147527621198e-07, "loss": 0.3445, "projector_lr": 2.9601442582863595e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.046142578125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -2.890625, "sft_loss": 0.68359375, "step": 518 }, { "dpo_loss": 0.482421875, "epoch": 0.08, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 9.866565533173596e-07, "loss": 0.2856, "projector_lr": 2.9599696599520788e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 1.515625, "rewards_train/rejected": -2.265625, "sft_loss": 0.73046875, "step": 519 }, { "dpo_loss": 0.298828125, "epoch": 0.08, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 9.865982283961907e-07, "loss": 0.3024, "projector_lr": 2.959794685188572e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.71875, "sft_loss": 0.77734375, "step": 520 }, { "dpo_loss": 0.2890625, "epoch": 0.08, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.865397780136513e-07, "loss": 0.262, "projector_lr": 2.959619334040954e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 2.140625, "rewards_train/rejected": -2.671875, "sft_loss": 0.61328125, "step": 521 }, { "dpo_loss": 0.318359375, "epoch": 0.08, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.86481202184812e-07, "loss": 0.2815, "projector_lr": 2.959443606554436e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 1.65625, "rewards_train/rejected": -2.125, "sft_loss": 0.796875, "step": 522 }, { "dpo_loss": 0.26171875, "epoch": 0.08, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 9.86422500924775e-07, "loss": 0.3352, "projector_lr": 2.9592675027743256e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 2.0, "rewards_train/rejected": -2.640625, "sft_loss": 0.5234375, "step": 523 }, { "dpo_loss": 0.26171875, "epoch": 0.08, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 9.863636742486763e-07, "loss": 0.3483, "projector_lr": 2.9590910227460294e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.205078125, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.625, "sft_loss": 0.671875, "step": 524 }, { "dpo_loss": 0.30859375, "epoch": 0.08, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 9.86304722171683e-07, "loss": 0.2936, "projector_lr": 2.9589141665150495e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0849609375, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.421875, "sft_loss": 0.74609375, "step": 525 }, { "dpo_loss": 0.578125, "epoch": 0.08, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 9.86245644708995e-07, "loss": 0.5488, "projector_lr": 2.9587369341269847e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 1.4296875, "rewards_train/rejected": -1.96875, "sft_loss": 0.65625, "step": 526 }, { "dpo_loss": 0.142578125, "epoch": 0.08, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 9.861864418758442e-07, "loss": 0.4047, "projector_lr": 2.9585593256275325e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.859375, "sft_loss": 0.6015625, "step": 527 }, { "dpo_loss": 0.142578125, "epoch": 0.08, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 9.861271136874953e-07, "loss": 0.2226, "projector_lr": 2.958381341062486e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0703125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.671875, "sft_loss": 0.640625, "step": 528 }, { "dpo_loss": 0.06396484375, "epoch": 0.08, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 9.860676601592451e-07, "loss": 0.1716, "projector_lr": 2.958202980477735e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0279541015625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.4375, "sft_loss": 0.68359375, "step": 529 }, { "dpo_loss": 0.6640625, "epoch": 0.08, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 9.860080813064226e-07, "loss": 0.5221, "projector_lr": 2.958024243919268e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 1.9921875, "rewards_train/rejected": -2.5, "sft_loss": 0.6875, "step": 530 }, { "dpo_loss": 0.64453125, "epoch": 0.08, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.859483771443895e-07, "loss": 0.5694, "projector_lr": 2.957845131433168e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 0.98828125, "rewards_train/rejected": -1.5546875, "sft_loss": 0.9296875, "step": 531 }, { "dpo_loss": 0.232421875, "epoch": 0.09, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.85888547688539e-07, "loss": 0.3995, "projector_lr": 2.957665643065617e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.421875, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.5, "sft_loss": 0.62890625, "step": 532 }, { "dpo_loss": 0.29296875, "epoch": 0.09, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 9.858285929542977e-07, "loss": 0.3156, "projector_lr": 2.9574857788628936e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.46875, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.484375, "sft_loss": 0.58984375, "step": 533 }, { "dpo_loss": 0.1845703125, "epoch": 0.09, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.857685129571238e-07, "loss": 0.1395, "projector_lr": 2.9573055388713716e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.130859375, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.75, "sft_loss": 0.92578125, "step": 534 }, { "dpo_loss": 0.5546875, "epoch": 0.09, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 9.85708307712508e-07, "loss": 0.5154, "projector_lr": 2.9571249231375238e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.109375, "sft_loss": 0.66796875, "step": 535 }, { "dpo_loss": 0.189453125, "epoch": 0.09, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 9.85647977235973e-07, "loss": 0.2158, "projector_lr": 2.956943931707919e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07470703125, "rewards_train/margins": 2.359375, "rewards_train/rejected": -2.28125, "sft_loss": 0.71875, "step": 536 }, { "dpo_loss": 0.50390625, "epoch": 0.09, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.85587521543074e-07, "loss": 0.3135, "projector_lr": 2.956762564629222e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 1.390625, "rewards_train/rejected": -2.15625, "sft_loss": 0.6640625, "step": 537 }, { "dpo_loss": 0.236328125, "epoch": 0.09, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 9.855269406493986e-07, "loss": 0.2793, "projector_lr": 2.956580821948196e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.78125, "sft_loss": 0.890625, "step": 538 }, { "dpo_loss": 0.2314453125, "epoch": 0.09, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 9.85466234570567e-07, "loss": 0.3775, "projector_lr": 2.956398703711701e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.099609375, "rewards_train/margins": 3.109375, "rewards_train/rejected": -3.203125, "sft_loss": 0.59375, "step": 539 }, { "dpo_loss": 0.470703125, "epoch": 0.09, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 9.854054033222305e-07, "loss": 0.5283, "projector_lr": 2.956216209966692e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.047119140625, "rewards_train/margins": 1.265625, "rewards_train/rejected": -1.3046875, "sft_loss": 0.88671875, "step": 540 }, { "dpo_loss": 0.4296875, "epoch": 0.09, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 9.85344446920074e-07, "loss": 0.6185, "projector_lr": 2.956033340760222e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 1.7421875, "rewards_train/rejected": -2.796875, "sft_loss": 1.046875, "step": 541 }, { "dpo_loss": 0.353515625, "epoch": 0.09, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 9.85283365379814e-07, "loss": 0.3464, "projector_lr": 2.9558500961394426e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 1.8828125, "rewards_train/rejected": -2.34375, "sft_loss": 0.64453125, "step": 542 }, { "dpo_loss": 0.64453125, "epoch": 0.09, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.852221587171993e-07, "loss": 0.382, "projector_lr": 2.9556664761515984e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 0.43359375, "rewards_train/rejected": -0.828125, "sft_loss": 0.71484375, "step": 543 }, { "dpo_loss": 0.271484375, "epoch": 0.09, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 9.851608269480112e-07, "loss": 0.324, "projector_lr": 2.955482480844034e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1474609375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.015625, "sft_loss": 0.85546875, "step": 544 }, { "dpo_loss": 0.29296875, "epoch": 0.09, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 9.850993700880628e-07, "loss": 0.1852, "projector_lr": 2.9552981102641883e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 2.25, "rewards_train/rejected": -3.265625, "sft_loss": 0.81640625, "step": 545 }, { "dpo_loss": 0.0257568359375, "epoch": 0.09, "final_loss": 0.0257568359375, "grad_norm": 0.0, "learning_rate": 9.850377881531998e-07, "loss": 0.1234, "projector_lr": 2.9551133644595997e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.71875, "sft_loss": 0.6171875, "step": 546 }, { "dpo_loss": 0.390625, "epoch": 0.09, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.849760811593005e-07, "loss": 0.3319, "projector_lr": 2.9549282434779014e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 1.90625, "rewards_train/rejected": -2.375, "sft_loss": 0.828125, "step": 547 }, { "dpo_loss": 0.416015625, "epoch": 0.09, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 9.849142491222743e-07, "loss": 0.314, "projector_lr": 2.9547427473668233e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 1.265625, "rewards_train/rejected": -1.828125, "sft_loss": 0.8359375, "step": 548 }, { "dpo_loss": 0.0888671875, "epoch": 0.09, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 9.848522920580641e-07, "loss": 0.3415, "projector_lr": 2.9545568761741926e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.068359375, "rewards_train/margins": 3.546875, "rewards_train/rejected": -3.625, "sft_loss": 0.68359375, "step": 549 }, { "dpo_loss": 0.1552734375, "epoch": 0.09, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 9.847902099826447e-07, "loss": 0.1699, "projector_lr": 2.954370629947934e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.388671875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.9375, "sft_loss": 0.92578125, "step": 550 }, { "dpo_loss": 0.314453125, "epoch": 0.09, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 9.847280029120224e-07, "loss": 0.2726, "projector_lr": 2.9541840087360674e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.390625, "sft_loss": 0.57421875, "step": 551 }, { "dpo_loss": 0.353515625, "epoch": 0.09, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 9.846656708622366e-07, "loss": 0.3206, "projector_lr": 2.9539970125867103e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.19921875, "rewards_train/margins": 1.8203125, "rewards_train/rejected": -2.015625, "sft_loss": 0.76171875, "step": 552 }, { "dpo_loss": 0.375, "epoch": 0.09, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 9.846032138493584e-07, "loss": 0.3505, "projector_lr": 2.9538096415480758e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.078125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.78125, "sft_loss": 0.9921875, "step": 553 }, { "dpo_loss": 0.703125, "epoch": 0.09, "final_loss": 0.703125, "grad_norm": 0.0, "learning_rate": 9.845406318894917e-07, "loss": 0.6028, "projector_lr": 2.9536218956684753e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 2.09375, "rewards_train/rejected": -2.734375, "sft_loss": 0.609375, "step": 554 }, { "dpo_loss": 0.404296875, "epoch": 0.09, "final_loss": 0.404296875, "grad_norm": 0.0, "learning_rate": 9.844779249987717e-07, "loss": 0.5256, "projector_lr": 2.9534337749963156e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6875, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.90625, "sft_loss": 0.75, "step": 555 }, { "dpo_loss": 0.302734375, "epoch": 0.09, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.844150931933669e-07, "loss": 0.2461, "projector_lr": 2.9532452795801007e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2373046875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.859375, "sft_loss": 0.66796875, "step": 556 }, { "dpo_loss": 0.50390625, "epoch": 0.09, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.84352136489477e-07, "loss": 0.5241, "projector_lr": 2.9530564094684307e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0, "rewards_train/margins": 1.875, "rewards_train/rejected": -2.875, "sft_loss": 0.859375, "step": 557 }, { "dpo_loss": 0.1611328125, "epoch": 0.09, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 9.842890549033345e-07, "loss": 0.2187, "projector_lr": 2.952867164710004e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37109375, "rewards_train/margins": 2.4375, "rewards_train/rejected": -2.0625, "sft_loss": 0.84375, "step": 558 }, { "dpo_loss": 0.25390625, "epoch": 0.09, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.84225848451204e-07, "loss": 0.2993, "projector_lr": 2.9526775453536124e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 2.234375, "rewards_train/rejected": -2.34375, "sft_loss": 0.84765625, "step": 559 }, { "dpo_loss": 0.0263671875, "epoch": 0.09, "final_loss": 0.0263671875, "grad_norm": 0.0, "learning_rate": 9.841625171493822e-07, "loss": 0.1614, "projector_lr": 2.9524875514481464e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14453125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.375, "sft_loss": 0.796875, "step": 560 }, { "dpo_loss": 0.2099609375, "epoch": 0.09, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 9.84099061014198e-07, "loss": 0.3937, "projector_lr": 2.9522971830425945e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.25, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.046875, "sft_loss": 0.63671875, "step": 561 }, { "dpo_loss": 0.12353515625, "epoch": 0.09, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 9.840354800620127e-07, "loss": 0.1937, "projector_lr": 2.9521064401860386e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1318359375, "rewards_train/margins": 3.328125, "rewards_train/rejected": -3.453125, "sft_loss": 0.462890625, "step": 562 }, { "dpo_loss": 0.16796875, "epoch": 0.09, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.839717743092195e-07, "loss": 0.2317, "projector_lr": 2.9519153229276586e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -2.90625, "sft_loss": 0.62890625, "step": 563 }, { "dpo_loss": 0.67578125, "epoch": 0.09, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 9.839079437722437e-07, "loss": 0.472, "projector_lr": 2.9517238313167312e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 1.0703125, "rewards_train/rejected": -2.1875, "sft_loss": 0.83203125, "step": 564 }, { "dpo_loss": 0.6875, "epoch": 0.09, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 9.83843988467543e-07, "loss": 0.3988, "projector_lr": 2.9515319654026297e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 1.3359375, "rewards_train/rejected": -1.9765625, "sft_loss": 0.64453125, "step": 565 }, { "dpo_loss": 0.302734375, "epoch": 0.09, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.837799084116078e-07, "loss": 0.2007, "projector_lr": 2.9513397252348234e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 2.09375, "rewards_train/rejected": -3.0, "sft_loss": 0.90234375, "step": 566 }, { "dpo_loss": 0.59375, "epoch": 0.09, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.837157036209593e-07, "loss": 0.4787, "projector_lr": 2.9511471108628784e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 0.49609375, "rewards_train/rejected": -0.9453125, "sft_loss": 0.87890625, "step": 567 }, { "dpo_loss": 0.1953125, "epoch": 0.09, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 9.836513741121522e-07, "loss": 0.4212, "projector_lr": 2.9509541223364567e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37890625, "rewards_train/margins": 2.09375, "rewards_train/rejected": -2.46875, "sft_loss": 0.83203125, "step": 568 }, { "dpo_loss": 0.0142822265625, "epoch": 0.09, "final_loss": 0.0142822265625, "grad_norm": 0.0, "learning_rate": 9.835869199017723e-07, "loss": 0.0958, "projector_lr": 2.9507607597053174e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10546875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.4375, "sft_loss": 0.68359375, "step": 569 }, { "dpo_loss": 0.1669921875, "epoch": 0.09, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 9.835223410064385e-07, "loss": 0.1724, "projector_lr": 2.950567023019316e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": 3.109375, "rewards_train/rejected": -3.25, "sft_loss": 0.66796875, "step": 570 }, { "dpo_loss": 0.796875, "epoch": 0.09, "final_loss": 0.796875, "grad_norm": 0.0, "learning_rate": 9.834576374428015e-07, "loss": 0.5304, "projector_lr": 2.9503729123284046e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 0.392578125, "rewards_train/rejected": -1.234375, "sft_loss": 0.61328125, "step": 571 }, { "dpo_loss": 0.2412109375, "epoch": 0.09, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.833928092275436e-07, "loss": 0.2698, "projector_lr": 2.950178427682631e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 1.984375, "rewards_train/rejected": -2.53125, "sft_loss": 0.8203125, "step": 572 }, { "dpo_loss": 0.302734375, "epoch": 0.09, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.833278563773799e-07, "loss": 0.2543, "projector_lr": 2.94998356913214e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 2.28125, "rewards_train/rejected": -2.875, "sft_loss": 0.4921875, "step": 573 }, { "dpo_loss": 0.390625, "epoch": 0.09, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.832627789090574e-07, "loss": 0.4029, "projector_lr": 2.949788336727173e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 0.9609375, "rewards_train/rejected": -1.3671875, "sft_loss": 0.67578125, "step": 574 }, { "dpo_loss": 0.291015625, "epoch": 0.09, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 9.831975768393555e-07, "loss": 0.2468, "projector_lr": 2.9495927305180667e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.40625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.59375, "sft_loss": 0.71484375, "step": 575 }, { "dpo_loss": 0.1806640625, "epoch": 0.09, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 9.831322501850852e-07, "loss": 0.1981, "projector_lr": 2.949396750555256e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1435546875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.40625, "sft_loss": 0.490234375, "step": 576 }, { "dpo_loss": 0.263671875, "epoch": 0.09, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 9.8306679896309e-07, "loss": 0.359, "projector_lr": 2.9492003968892705e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.546875, "rewards_train/margins": 2.65625, "rewards_train/rejected": -3.203125, "sft_loss": 0.75, "step": 577 }, { "dpo_loss": 0.7890625, "epoch": 0.09, "final_loss": 0.7890625, "grad_norm": 0.0, "learning_rate": 9.830012231902453e-07, "loss": 0.5742, "projector_lr": 2.9490036695707364e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6875, "rewards_train/margins": 1.0, "rewards_train/rejected": -1.6953125, "sft_loss": 0.73046875, "step": 578 }, { "dpo_loss": 0.365234375, "epoch": 0.09, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 9.82935522883459e-07, "loss": 0.4054, "projector_lr": 2.9488065686503775e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.78125, "rewards_train/margins": 1.515625, "rewards_train/rejected": -2.296875, "sft_loss": 0.9375, "step": 579 }, { "dpo_loss": 0.21484375, "epoch": 0.09, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 9.828696980596706e-07, "loss": 0.2402, "projector_lr": 2.948609094179012e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2001953125, "rewards_train/margins": 1.984375, "rewards_train/rejected": -2.1875, "sft_loss": 0.625, "step": 580 }, { "dpo_loss": 0.41796875, "epoch": 0.09, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.828037487358522e-07, "loss": 0.3157, "projector_lr": 2.9484112462075565e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 2.4375, "rewards_train/rejected": -3.03125, "sft_loss": 0.54296875, "step": 581 }, { "dpo_loss": 0.1923828125, "epoch": 0.09, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 9.827376749290073e-07, "loss": 0.2746, "projector_lr": 2.948213024787022e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.046875, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.140625, "sft_loss": 0.75, "step": 582 }, { "dpo_loss": 0.37109375, "epoch": 0.09, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 9.826714766561725e-07, "loss": 0.358, "projector_lr": 2.948014429968518e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -2.453125, "sft_loss": 0.82421875, "step": 583 }, { "dpo_loss": 0.11669921875, "epoch": 0.09, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 9.826051539344155e-07, "loss": 0.2706, "projector_lr": 2.947815461803247e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.703125, "sft_loss": 0.72265625, "step": 584 }, { "dpo_loss": 0.4765625, "epoch": 0.09, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 9.825387067808368e-07, "loss": 0.4075, "projector_lr": 2.947616120342511e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.515625, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.15625, "sft_loss": 0.6171875, "step": 585 }, { "dpo_loss": 0.72265625, "epoch": 0.09, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 9.824721352125686e-07, "loss": 0.5798, "projector_lr": 2.947416405637706e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 1.703125, "rewards_train/rejected": -2.578125, "sft_loss": 1.0, "step": 586 }, { "dpo_loss": 0.1259765625, "epoch": 0.09, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 9.824054392467755e-07, "loss": 0.2085, "projector_lr": 2.947216317740327e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05029296875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -2.78125, "sft_loss": 0.9140625, "step": 587 }, { "dpo_loss": 0.80859375, "epoch": 0.09, "final_loss": 0.80859375, "grad_norm": 0.0, "learning_rate": 9.823386189006535e-07, "loss": 0.46, "projector_lr": 2.9470158567019606e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 0.69140625, "rewards_train/rejected": -1.4609375, "sft_loss": 0.6796875, "step": 588 }, { "dpo_loss": 0.6640625, "epoch": 0.09, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 9.822716741914316e-07, "loss": 0.5946, "projector_lr": 2.946815022574295e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 1.578125, "rewards_train/rejected": -2.171875, "sft_loss": 0.77734375, "step": 589 }, { "dpo_loss": 0.1728515625, "epoch": 0.09, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 9.8220460513637e-07, "loss": 0.2373, "projector_lr": 2.9466138154091103e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.515625, "sft_loss": 0.640625, "step": 590 }, { "dpo_loss": 0.5234375, "epoch": 0.09, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.821374117527617e-07, "loss": 0.3518, "projector_lr": 2.9464122352582852e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 1.5546875, "rewards_train/rejected": -1.984375, "sft_loss": 0.76171875, "step": 591 }, { "dpo_loss": 0.67578125, "epoch": 0.09, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 9.820700940579312e-07, "loss": 0.4742, "projector_lr": 2.946210282173794e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 1.984375, "rewards_train/rejected": -2.765625, "sft_loss": 0.65625, "step": 592 }, { "dpo_loss": 0.10009765625, "epoch": 0.09, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 9.820026520692352e-07, "loss": 0.1514, "projector_lr": 2.946007956207706e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0068359375, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.125, "sft_loss": 0.5078125, "step": 593 }, { "dpo_loss": 0.275390625, "epoch": 0.1, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.819350858040627e-07, "loss": 0.2072, "projector_lr": 2.945805257412189e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.30078125, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.53125, "sft_loss": 0.796875, "step": 594 }, { "dpo_loss": 0.150390625, "epoch": 0.1, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 9.818673952798346e-07, "loss": 0.2213, "projector_lr": 2.945602185839504e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 2.515625, "rewards_train/rejected": -3.046875, "sft_loss": 0.60546875, "step": 595 }, { "dpo_loss": 0.1328125, "epoch": 0.1, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 9.817995805140035e-07, "loss": 0.1735, "projector_lr": 2.945398741542011e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.375, "rewards_train/margins": 3.140625, "rewards_train/rejected": -2.765625, "sft_loss": 0.65234375, "step": 596 }, { "dpo_loss": 0.361328125, "epoch": 0.1, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.817316415240548e-07, "loss": 0.4266, "projector_lr": 2.9451949245721644e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.376953125, "rewards_train/margins": 1.5, "rewards_train/rejected": -1.875, "sft_loss": 0.59375, "step": 597 }, { "dpo_loss": 0.248046875, "epoch": 0.1, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 9.81663578327505e-07, "loss": 0.3467, "projector_lr": 2.944990734982515e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.625, "sft_loss": 0.7890625, "step": 598 }, { "dpo_loss": 0.10498046875, "epoch": 0.1, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 9.81595390941903e-07, "loss": 0.1069, "projector_lr": 2.9447861728257093e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.9375, "sft_loss": 0.7890625, "step": 599 }, { "dpo_loss": 0.17578125, "epoch": 0.1, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 9.815270793848303e-07, "loss": 0.2272, "projector_lr": 2.944581238154491e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.109375, "sft_loss": 0.5859375, "step": 600 }, { "dpo_loss": 0.392578125, "epoch": 0.1, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 9.814586436738997e-07, "loss": 0.4051, "projector_lr": 2.944375931021699e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.365234375, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.328125, "sft_loss": 0.8125, "step": 601 }, { "dpo_loss": 0.431640625, "epoch": 0.1, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 9.81390083826756e-07, "loss": 0.449, "projector_lr": 2.944170251480268e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.435546875, "rewards_train/margins": 1.515625, "rewards_train/rejected": -1.9453125, "sft_loss": 0.6328125, "step": 602 }, { "dpo_loss": 0.130859375, "epoch": 0.1, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 9.813213998610764e-07, "loss": 0.1404, "projector_lr": 2.943964199583229e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 2.359375, "rewards_train/rejected": -2.5, "sft_loss": 0.671875, "step": 603 }, { "dpo_loss": 0.10107421875, "epoch": 0.1, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 9.8125259179457e-07, "loss": 0.3081, "projector_lr": 2.94375777538371e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0283203125, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.28125, "sft_loss": 0.68359375, "step": 604 }, { "dpo_loss": 0.12890625, "epoch": 0.1, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 9.811836596449778e-07, "loss": 0.1905, "projector_lr": 2.9435509789349333e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.04052734375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.515625, "sft_loss": 0.58984375, "step": 605 }, { "dpo_loss": 0.26171875, "epoch": 0.1, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 9.811146034300727e-07, "loss": 0.334, "projector_lr": 2.943343810290218e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 1.828125, "rewards_train/rejected": -2.3125, "sft_loss": 0.82421875, "step": 606 }, { "dpo_loss": 0.361328125, "epoch": 0.1, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.810454231676597e-07, "loss": 0.3108, "projector_lr": 2.9431362695029796e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.03759765625, "rewards_train/margins": 1.6171875, "rewards_train/rejected": -1.578125, "sft_loss": 0.67578125, "step": 607 }, { "dpo_loss": 0.2109375, "epoch": 0.1, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 9.809761188755763e-07, "loss": 0.239, "projector_lr": 2.9429283566267288e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.130859375, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.59375, "sft_loss": 0.55859375, "step": 608 }, { "dpo_loss": 0.2021484375, "epoch": 0.1, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 9.809066905716908e-07, "loss": 0.2377, "projector_lr": 2.9427200717150723e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.453125, "sft_loss": 0.8203125, "step": 609 }, { "dpo_loss": 0.294921875, "epoch": 0.1, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.808371382739044e-07, "loss": 0.4393, "projector_lr": 2.9425114148217133e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 2.53125, "rewards_train/rejected": -3.09375, "sft_loss": 0.69921875, "step": 610 }, { "dpo_loss": 0.5390625, "epoch": 0.1, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 9.8076746200015e-07, "loss": 0.4548, "projector_lr": 2.9423023860004504e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 2.046875, "rewards_train/rejected": -2.84375, "sft_loss": 0.7421875, "step": 611 }, { "dpo_loss": 0.1982421875, "epoch": 0.1, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 9.806976617683926e-07, "loss": 0.1582, "projector_lr": 2.942092985305178e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.03125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.328125, "sft_loss": 0.8671875, "step": 612 }, { "dpo_loss": 0.64453125, "epoch": 0.1, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.806277375966292e-07, "loss": 0.3709, "projector_lr": 2.9418832127898873e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 1.375, "rewards_train/rejected": -1.890625, "sft_loss": 0.8359375, "step": 613 }, { "dpo_loss": 0.345703125, "epoch": 0.1, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 9.80557689502888e-07, "loss": 0.3526, "projector_lr": 2.9416730685086643e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.65625, "sft_loss": 0.59765625, "step": 614 }, { "dpo_loss": 0.34375, "epoch": 0.1, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 9.804875175052303e-07, "loss": 0.2815, "projector_lr": 2.9414625525156913e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 1.5390625, "rewards_train/rejected": -2.03125, "sft_loss": 0.69921875, "step": 615 }, { "dpo_loss": 0.080078125, "epoch": 0.1, "final_loss": 0.080078125, "grad_norm": 0.0, "learning_rate": 9.804172216217487e-07, "loss": 0.1065, "projector_lr": 2.9412516648652464e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.765625, "sft_loss": 0.62890625, "step": 616 }, { "dpo_loss": 0.107421875, "epoch": 0.1, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 9.803468018705675e-07, "loss": 0.1771, "projector_lr": 2.941040405611703e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35546875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.1875, "sft_loss": 0.8125, "step": 617 }, { "dpo_loss": 0.46484375, "epoch": 0.1, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 9.802762582698438e-07, "loss": 0.3049, "projector_lr": 2.9408287748095317e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.53125, "rewards_train/margins": 1.25, "rewards_train/rejected": -1.78125, "sft_loss": 1.1484375, "step": 618 }, { "dpo_loss": 0.5, "epoch": 0.1, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 9.802055908377657e-07, "loss": 0.3395, "projector_lr": 2.9406167725132974e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.011962890625, "rewards_train/margins": 1.3671875, "rewards_train/rejected": -1.359375, "sft_loss": 0.7265625, "step": 619 }, { "dpo_loss": 0.2099609375, "epoch": 0.1, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 9.80134799592554e-07, "loss": 0.2754, "projector_lr": 2.940404398777662e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 2.046875, "rewards_train/rejected": -2.609375, "sft_loss": 0.77734375, "step": 620 }, { "dpo_loss": 0.1494140625, "epoch": 0.1, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 9.800638845524607e-07, "loss": 0.1628, "projector_lr": 2.940191653657382e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -3.25, "sft_loss": 0.86328125, "step": 621 }, { "dpo_loss": 0.5078125, "epoch": 0.1, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.799928457357702e-07, "loss": 0.3607, "projector_lr": 2.939978537207311e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.46875, "rewards_train/margins": 0.71875, "rewards_train/rejected": -1.1875, "sft_loss": 0.65625, "step": 622 }, { "dpo_loss": 0.25390625, "epoch": 0.1, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.799216831607987e-07, "loss": 0.1887, "projector_lr": 2.9397650494823967e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.058837890625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -3.15625, "sft_loss": 0.953125, "step": 623 }, { "dpo_loss": 0.396484375, "epoch": 0.1, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 9.798503968458945e-07, "loss": 0.3017, "projector_lr": 2.9395511905376833e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.265625, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.109375, "sft_loss": 0.609375, "step": 624 }, { "dpo_loss": 0.3671875, "epoch": 0.1, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 9.797789868094373e-07, "loss": 0.3283, "projector_lr": 2.939336960428312e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 1.546875, "rewards_train/rejected": -1.8515625, "sft_loss": 0.76171875, "step": 625 }, { "dpo_loss": 0.2333984375, "epoch": 0.1, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 9.797074530698393e-07, "loss": 0.348, "projector_lr": 2.939122359209518e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.20703125, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.390625, "sft_loss": 0.703125, "step": 626 }, { "dpo_loss": 0.44140625, "epoch": 0.1, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 9.79635795645544e-07, "loss": 0.536, "projector_lr": 2.938907386936632e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 1.296875, "rewards_train/rejected": -1.8125, "sft_loss": 0.83203125, "step": 627 }, { "dpo_loss": 0.197265625, "epoch": 0.1, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 9.79564014555027e-07, "loss": 0.3258, "projector_lr": 2.9386920436650818e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 3.0517578125e-05, "rewards_train/margins": 2.28125, "rewards_train/rejected": -2.28125, "sft_loss": 0.65625, "step": 628 }, { "dpo_loss": 0.33203125, "epoch": 0.1, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.794921098167966e-07, "loss": 0.2436, "projector_lr": 2.9384763294503898e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0306396484375, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.171875, "sft_loss": 0.6484375, "step": 629 }, { "dpo_loss": 0.1689453125, "epoch": 0.1, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 9.794200814493914e-07, "loss": 0.2752, "projector_lr": 2.9382602443481743e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3984375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.171875, "sft_loss": 0.58203125, "step": 630 }, { "dpo_loss": 0.408203125, "epoch": 0.1, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 9.793479294713831e-07, "loss": 0.336, "projector_lr": 2.93804378841415e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.390625, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.421875, "sft_loss": 0.80859375, "step": 631 }, { "dpo_loss": 0.1923828125, "epoch": 0.1, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 9.792756539013752e-07, "loss": 0.2597, "projector_lr": 2.9378269617041257e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.27734375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.0, "sft_loss": 0.66796875, "step": 632 }, { "dpo_loss": 0.4921875, "epoch": 0.1, "final_loss": 0.4921875, "grad_norm": 0.0, "learning_rate": 9.792032547580022e-07, "loss": 0.3814, "projector_lr": 2.937609764274007e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1875, "rewards_train/margins": 1.9453125, "rewards_train/rejected": -3.140625, "sft_loss": 0.625, "step": 633 }, { "dpo_loss": 0.2177734375, "epoch": 0.1, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 9.791307320599314e-07, "loss": 0.4312, "projector_lr": 2.9373921961797943e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2451171875, "rewards_train/margins": 2.234375, "rewards_train/rejected": -1.984375, "sft_loss": 0.79296875, "step": 634 }, { "dpo_loss": 0.322265625, "epoch": 0.1, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 9.790580858258615e-07, "loss": 0.2402, "projector_lr": 2.9371742574775844e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.15625, "sft_loss": 0.74609375, "step": 635 }, { "dpo_loss": 0.423828125, "epoch": 0.1, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 9.789853160745232e-07, "loss": 0.4706, "projector_lr": 2.9369559482235696e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.3828125, "rewards_train/margins": 1.8671875, "rewards_train/rejected": -2.25, "sft_loss": 0.83984375, "step": 636 }, { "dpo_loss": 0.53125, "epoch": 0.1, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 9.789124228246787e-07, "loss": 0.4624, "projector_lr": 2.9367372684740368e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.43359375, "rewards_train/margins": 1.421875, "rewards_train/rejected": -1.859375, "sft_loss": 0.65625, "step": 637 }, { "dpo_loss": 0.59375, "epoch": 0.1, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.788394060951227e-07, "loss": 0.3711, "projector_lr": 2.9365182182853683e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 1.0234375, "rewards_train/rejected": -1.546875, "sft_loss": 0.83203125, "step": 638 }, { "dpo_loss": 0.265625, "epoch": 0.1, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.787662659046813e-07, "loss": 0.1967, "projector_lr": 2.936298797714044e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.23828125, "rewards_train/margins": 2.09375, "rewards_train/rejected": -1.859375, "sft_loss": 0.765625, "step": 639 }, { "dpo_loss": 0.1591796875, "epoch": 0.1, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 9.786930022722121e-07, "loss": 0.4825, "projector_lr": 2.9360790068166368e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0311279296875, "rewards_train/margins": 2.71875, "rewards_train/rejected": -2.6875, "sft_loss": 0.6796875, "step": 640 }, { "dpo_loss": 0.39453125, "epoch": 0.1, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 9.786196152166055e-07, "loss": 0.2618, "projector_lr": 2.935858845649817e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.04736328125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.125, "sft_loss": 0.84765625, "step": 641 }, { "dpo_loss": 0.1396484375, "epoch": 0.1, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 9.785461047567828e-07, "loss": 0.1315, "projector_lr": 2.935638314270349e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04345703125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.546875, "sft_loss": 0.77734375, "step": 642 }, { "dpo_loss": 0.353515625, "epoch": 0.1, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 9.784724709116977e-07, "loss": 0.3306, "projector_lr": 2.9354174127350932e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.134765625, "rewards_train/margins": 3.03125, "rewards_train/rejected": -2.890625, "sft_loss": 0.69921875, "step": 643 }, { "dpo_loss": 0.58984375, "epoch": 0.1, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 9.783987137003352e-07, "loss": 0.4042, "projector_lr": 2.935196141101006e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.150390625, "rewards_train/margins": 1.2421875, "rewards_train/rejected": -1.390625, "sft_loss": 0.90234375, "step": 644 }, { "dpo_loss": 0.46875, "epoch": 0.1, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 9.783248331417126e-07, "loss": 0.4835, "projector_lr": 2.934974499425138e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 1.171875, "rewards_train/rejected": -2.5, "sft_loss": 0.7578125, "step": 645 }, { "dpo_loss": 0.5390625, "epoch": 0.1, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 9.782508292548786e-07, "loss": 0.4031, "projector_lr": 2.934752487764636e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.341796875, "rewards_train/margins": 1.7421875, "rewards_train/rejected": -2.078125, "sft_loss": 0.76953125, "step": 646 }, { "dpo_loss": 0.25390625, "epoch": 0.1, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.78176702058914e-07, "loss": 0.3774, "projector_lr": 2.934530106176742e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.185546875, "rewards_train/margins": 2.28125, "rewards_train/rejected": -2.484375, "sft_loss": 0.66015625, "step": 647 }, { "dpo_loss": 0.162109375, "epoch": 0.1, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 9.781024515729313e-07, "loss": 0.3885, "projector_lr": 2.934307354718794e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.96875, "sft_loss": 0.671875, "step": 648 }, { "dpo_loss": 0.349609375, "epoch": 0.1, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 9.780280778160748e-07, "loss": 0.3605, "projector_lr": 2.9340842334482247e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.109375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -2.9375, "sft_loss": 0.64453125, "step": 649 }, { "dpo_loss": 0.255859375, "epoch": 0.1, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.779535808075205e-07, "loss": 0.2795, "projector_lr": 2.9338607424225615e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.003173828125, "rewards_train/margins": 1.859375, "rewards_train/rejected": -1.8671875, "sft_loss": 0.53125, "step": 650 }, { "dpo_loss": 0.302734375, "epoch": 0.1, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.77878960566476e-07, "loss": 0.3031, "projector_lr": 2.9336368816994286e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 1.71875, "rewards_train/rejected": -2.140625, "sft_loss": 0.609375, "step": 651 }, { "dpo_loss": 0.23828125, "epoch": 0.1, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 9.778042171121814e-07, "loss": 0.1976, "projector_lr": 2.9334126513365444e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27734375, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.03125, "sft_loss": 0.54296875, "step": 652 }, { "dpo_loss": 0.51953125, "epoch": 0.1, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 9.777293504639078e-07, "loss": 0.4444, "projector_lr": 2.933188051391723e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.04443359375, "rewards_train/margins": 0.80859375, "rewards_train/rejected": -0.765625, "sft_loss": 0.49609375, "step": 653 }, { "dpo_loss": 0.3359375, "epoch": 0.1, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.77654360640958e-07, "loss": 0.4706, "projector_lr": 2.932963081922875e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.408203125, "rewards_train/margins": 2.5, "rewards_train/rejected": -2.90625, "sft_loss": 0.7421875, "step": 654 }, { "dpo_loss": 0.341796875, "epoch": 0.1, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 9.775792476626677e-07, "loss": 0.3266, "projector_lr": 2.9327377429880036e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.08056640625, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.125, "sft_loss": 0.765625, "step": 655 }, { "dpo_loss": 0.10888671875, "epoch": 0.1, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 9.77504011548403e-07, "loss": 0.1742, "projector_lr": 2.9325120346452093e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 2.5625, "rewards_train/rejected": -3.03125, "sft_loss": 0.79296875, "step": 656 }, { "dpo_loss": 0.263671875, "epoch": 0.11, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 9.774286523175623e-07, "loss": 0.4229, "projector_lr": 2.9322859569526873e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2734375, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.390625, "sft_loss": 0.828125, "step": 657 }, { "dpo_loss": 0.1962890625, "epoch": 0.11, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 9.77353169989576e-07, "loss": 0.2754, "projector_lr": 2.9320595099687277e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.337890625, "rewards_train/margins": 2.78125, "rewards_train/rejected": -2.4375, "sft_loss": 0.98046875, "step": 658 }, { "dpo_loss": 0.29296875, "epoch": 0.11, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 9.772775645839056e-07, "loss": 0.2703, "projector_lr": 2.9318326937517167e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2060546875, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.375, "sft_loss": 0.61328125, "step": 659 }, { "dpo_loss": 0.2314453125, "epoch": 0.11, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 9.77201836120045e-07, "loss": 0.327, "projector_lr": 2.9316055083601348e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2431640625, "rewards_train/margins": 2.171875, "rewards_train/rejected": -1.9296875, "sft_loss": 0.75, "step": 660 }, { "dpo_loss": 0.4453125, "epoch": 0.11, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 9.771259846175194e-07, "loss": 0.4126, "projector_lr": 2.931377953852558e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 1.2578125, "rewards_train/rejected": -1.46875, "sft_loss": 0.6171875, "step": 661 }, { "dpo_loss": 0.07373046875, "epoch": 0.11, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 9.770500100958859e-07, "loss": 0.1867, "projector_lr": 2.9311500302876577e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.003662109375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -2.84375, "sft_loss": 0.609375, "step": 662 }, { "dpo_loss": 0.25, "epoch": 0.11, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 9.769739125747334e-07, "loss": 0.5784, "projector_lr": 2.9309217377242e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1494140625, "rewards_train/margins": 2.640625, "rewards_train/rejected": -2.796875, "sft_loss": 0.6796875, "step": 663 }, { "dpo_loss": 0.337890625, "epoch": 0.11, "final_loss": 0.337890625, "grad_norm": 0.0, "learning_rate": 9.768976920736821e-07, "loss": 0.2347, "projector_lr": 2.930693076221047e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.025390625, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.09375, "sft_loss": 0.77734375, "step": 664 }, { "dpo_loss": 0.32421875, "epoch": 0.11, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 9.768213486123848e-07, "loss": 0.2654, "projector_lr": 2.9304640458371545e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1123046875, "rewards_train/margins": 1.484375, "rewards_train/rejected": -1.375, "sft_loss": 0.71875, "step": 665 }, { "dpo_loss": 0.2197265625, "epoch": 0.11, "final_loss": 0.2197265625, "grad_norm": 0.0, "learning_rate": 9.767448822105248e-07, "loss": 0.2068, "projector_lr": 2.930234646631575e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.006744384765625, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.25, "sft_loss": 0.83984375, "step": 666 }, { "dpo_loss": 0.56640625, "epoch": 0.11, "final_loss": 0.56640625, "grad_norm": 0.0, "learning_rate": 9.76668292887818e-07, "loss": 0.5467, "projector_lr": 2.9300048786634542e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 1.09375, "rewards_train/rejected": -1.4296875, "sft_loss": 0.67578125, "step": 667 }, { "dpo_loss": 0.5078125, "epoch": 0.11, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.765915806640116e-07, "loss": 0.572, "projector_lr": 2.929774741992035e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.197265625, "rewards_train/margins": 1.1875, "rewards_train/rejected": -1.390625, "sft_loss": 0.75, "step": 668 }, { "dpo_loss": 0.1826171875, "epoch": 0.11, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 9.765147455588847e-07, "loss": 0.1963, "projector_lr": 2.9295442366766544e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.298828125, "rewards_train/margins": 2.578125, "rewards_train/rejected": -2.875, "sft_loss": 0.87109375, "step": 669 }, { "dpo_loss": 0.439453125, "epoch": 0.11, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.764377875922478e-07, "loss": 0.3587, "projector_lr": 2.9293133627767436e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 1.828125, "rewards_train/rejected": -2.359375, "sft_loss": 0.78125, "step": 670 }, { "dpo_loss": 0.255859375, "epoch": 0.11, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.763607067839435e-07, "loss": 0.3039, "projector_lr": 2.9290821203518304e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.07470703125, "rewards_train/margins": 1.953125, "rewards_train/rejected": -1.875, "sft_loss": 0.75390625, "step": 671 }, { "dpo_loss": 0.369140625, "epoch": 0.11, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.762835031538454e-07, "loss": 0.3613, "projector_lr": 2.9288505094615364e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.06689453125, "rewards_train/margins": 1.6171875, "rewards_train/rejected": -1.5546875, "sft_loss": 0.7578125, "step": 672 }, { "dpo_loss": 0.482421875, "epoch": 0.11, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 9.762061767218597e-07, "loss": 0.5043, "projector_lr": 2.928618530165579e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 1.5625, "rewards_train/rejected": -2.234375, "sft_loss": 0.77734375, "step": 673 }, { "dpo_loss": 0.388671875, "epoch": 0.11, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 9.761287275079235e-07, "loss": 0.2521, "projector_lr": 2.9283861825237703e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.000274658203125, "rewards_train/margins": 1.5, "rewards_train/rejected": -1.5, "sft_loss": 0.84765625, "step": 674 }, { "dpo_loss": 0.1953125, "epoch": 0.11, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 9.760511555320057e-07, "loss": 0.2427, "projector_lr": 2.9281534665960174e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28515625, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.609375, "sft_loss": 0.625, "step": 675 }, { "dpo_loss": 0.21875, "epoch": 0.11, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 9.75973460814107e-07, "loss": 0.2343, "projector_lr": 2.9279203824423214e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.058349609375, "rewards_train/margins": 1.859375, "rewards_train/rejected": -1.921875, "sft_loss": 0.7421875, "step": 676 }, { "dpo_loss": 0.1826171875, "epoch": 0.11, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 9.7589564337426e-07, "loss": 0.202, "projector_lr": 2.92768693012278e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1123046875, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.203125, "sft_loss": 0.84375, "step": 677 }, { "dpo_loss": 0.3984375, "epoch": 0.11, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 9.758177032325283e-07, "loss": 0.3082, "projector_lr": 2.927453109697585e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.34375, "rewards_train/margins": 2.140625, "rewards_train/rejected": -2.484375, "sft_loss": 0.640625, "step": 678 }, { "dpo_loss": 0.31640625, "epoch": 0.11, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 9.757396404090077e-07, "loss": 0.2287, "projector_lr": 2.927218921227023e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0263671875, "rewards_train/margins": 1.671875, "rewards_train/rejected": -1.703125, "sft_loss": 0.6328125, "step": 679 }, { "dpo_loss": 1.3046875, "epoch": 0.11, "final_loss": 1.3046875, "grad_norm": 0.0, "learning_rate": 9.756614549238251e-07, "loss": 0.7844, "projector_lr": 2.926984364771476e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -1.21875, "rewards_train/margins": -0.6640625, "rewards_train/rejected": -0.55078125, "sft_loss": 0.95703125, "step": 680 }, { "dpo_loss": 0.28125, "epoch": 0.11, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.7558314679714e-07, "loss": 0.2734, "projector_lr": 2.9267494403914203e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.314453125, "rewards_train/margins": 1.90625, "rewards_train/rejected": -1.59375, "sft_loss": 0.73046875, "step": 681 }, { "dpo_loss": 0.37109375, "epoch": 0.11, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 9.755047160491423e-07, "loss": 0.4428, "projector_lr": 2.926514148147427e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.259765625, "rewards_train/margins": 3.0625, "rewards_train/rejected": -2.796875, "sft_loss": 0.68359375, "step": 682 }, { "dpo_loss": 0.126953125, "epoch": 0.11, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 9.754261627000541e-07, "loss": 0.1621, "projector_lr": 2.9262784881001626e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06298828125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -2.609375, "sft_loss": 0.69921875, "step": 683 }, { "dpo_loss": 0.181640625, "epoch": 0.11, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 9.753474867701294e-07, "loss": 0.2497, "projector_lr": 2.9260424603103883e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.322265625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -2.359375, "sft_loss": 0.83984375, "step": 684 }, { "dpo_loss": 0.2578125, "epoch": 0.11, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.752686882796534e-07, "loss": 0.6597, "projector_lr": 2.92580606483896e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.056640625, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.28125, "sft_loss": 0.78125, "step": 685 }, { "dpo_loss": 0.298828125, "epoch": 0.11, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 9.751897672489426e-07, "loss": 0.2456, "projector_lr": 2.9255693017468276e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.166015625, "rewards_train/margins": 2.71875, "rewards_train/rejected": -2.5625, "sft_loss": 0.87109375, "step": 686 }, { "dpo_loss": 0.2431640625, "epoch": 0.11, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 9.751107236983459e-07, "loss": 0.225, "projector_lr": 2.925332171095038e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.62890625, "rewards_train/margins": 2.65625, "rewards_train/rejected": -2.015625, "sft_loss": 0.71484375, "step": 687 }, { "dpo_loss": 0.171875, "epoch": 0.11, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 9.750315576482434e-07, "loss": 0.1301, "projector_lr": 2.9250946729447305e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2158203125, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.1875, "sft_loss": 0.55859375, "step": 688 }, { "dpo_loss": 0.1875, "epoch": 0.11, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 9.749522691190466e-07, "loss": 0.1645, "projector_lr": 2.92485680735714e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.234375, "sft_loss": 0.7578125, "step": 689 }, { "dpo_loss": 0.59765625, "epoch": 0.11, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 9.748728581311988e-07, "loss": 0.3982, "projector_lr": 2.924618574393597e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -0.2451171875, "rewards_train/margins": 0.765625, "rewards_train/rejected": -1.0078125, "sft_loss": 0.734375, "step": 690 }, { "dpo_loss": 0.16015625, "epoch": 0.11, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 9.74793324705175e-07, "loss": 0.1659, "projector_lr": 2.924379974115525e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1328125, "rewards_train/margins": 2.734375, "rewards_train/rejected": -2.609375, "sft_loss": 0.91015625, "step": 691 }, { "dpo_loss": 0.232421875, "epoch": 0.11, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.747136688614814e-07, "loss": 0.2102, "projector_lr": 2.9241410065844442e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.25, "rewards_train/margins": 1.8046875, "rewards_train/rejected": -2.0625, "sft_loss": 0.8828125, "step": 692 }, { "dpo_loss": 0.2119140625, "epoch": 0.11, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 9.746338906206558e-07, "loss": 0.1832, "projector_lr": 2.9239016718619677e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.0189208984375, "rewards_train/margins": 3.8125, "rewards_train/rejected": -3.78125, "sft_loss": 0.71875, "step": 693 }, { "dpo_loss": 0.271484375, "epoch": 0.11, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 9.74553990003268e-07, "loss": 0.1722, "projector_lr": 2.9236619700098044e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.224609375, "rewards_train/margins": 3.0, "rewards_train/rejected": -2.78125, "sft_loss": 0.6484375, "step": 694 }, { "dpo_loss": 0.3828125, "epoch": 0.11, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 9.74473967029919e-07, "loss": 0.3584, "projector_lr": 2.923421901089757e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.015625, "sft_loss": 0.6015625, "step": 695 }, { "dpo_loss": 0.2412109375, "epoch": 0.11, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.743938217212412e-07, "loss": 0.4463, "projector_lr": 2.923181465163724e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.009521484375, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.34375, "sft_loss": 0.73046875, "step": 696 }, { "dpo_loss": 0.26171875, "epoch": 0.11, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 9.74313554097899e-07, "loss": 0.2161, "projector_lr": 2.9229406622936976e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53125, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.5, "sft_loss": 0.51953125, "step": 697 }, { "dpo_loss": 0.6328125, "epoch": 0.11, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 9.74233164180588e-07, "loss": 0.459, "projector_lr": 2.9226994925417647e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.0986328125, "rewards_train/margins": 1.1796875, "rewards_train/rejected": -1.078125, "sft_loss": 0.5703125, "step": 698 }, { "dpo_loss": 0.361328125, "epoch": 0.11, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.741526519900356e-07, "loss": 0.278, "projector_lr": 2.922457955970107e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.130859375, "rewards_train/margins": 2.390625, "rewards_train/rejected": -2.25, "sft_loss": 0.921875, "step": 699 }, { "dpo_loss": 0.25390625, "epoch": 0.11, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.74072017547e-07, "loss": 0.4139, "projector_lr": 2.9222160526410006e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.197265625, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.546875, "sft_loss": 0.8671875, "step": 700 }, { "dpo_loss": 0.41796875, "epoch": 0.11, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.739912608722722e-07, "loss": 0.5082, "projector_lr": 2.9219737826168167e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.068359375, "rewards_train/margins": 1.4140625, "rewards_train/rejected": -1.34375, "sft_loss": 0.66015625, "step": 701 }, { "dpo_loss": 0.271484375, "epoch": 0.11, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 9.739103819866735e-07, "loss": 0.2054, "projector_lr": 2.9217311459600205e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.087890625, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.46875, "sft_loss": 0.8984375, "step": 702 }, { "dpo_loss": 0.515625, "epoch": 0.11, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 9.738293809110574e-07, "loss": 0.3761, "projector_lr": 2.921488142733172e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.009033203125, "rewards_train/margins": 1.515625, "rewards_train/rejected": -1.5078125, "sft_loss": 0.66015625, "step": 703 }, { "dpo_loss": 0.1806640625, "epoch": 0.11, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 9.737482576663082e-07, "loss": 0.2714, "projector_lr": 2.9212447729989253e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2451171875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -2.9375, "sft_loss": 0.66796875, "step": 704 }, { "dpo_loss": 0.1513671875, "epoch": 0.11, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 9.736670122733431e-07, "loss": 0.2459, "projector_lr": 2.921001036820029e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34375, "rewards_train/margins": 3.296875, "rewards_train/rejected": -2.953125, "sft_loss": 0.80859375, "step": 705 }, { "dpo_loss": 0.58203125, "epoch": 0.11, "final_loss": 0.58203125, "grad_norm": 0.0, "learning_rate": 9.735856447531092e-07, "loss": 0.487, "projector_lr": 2.920756934259328e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1640625, "rewards_train/margins": 1.0390625, "rewards_train/rejected": -1.1953125, "sft_loss": 0.66796875, "step": 706 }, { "dpo_loss": 0.28125, "epoch": 0.11, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.735041551265861e-07, "loss": 0.2222, "projector_lr": 2.9205124653797584e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.010009765625, "rewards_train/margins": 1.78125, "rewards_train/rejected": -1.7734375, "sft_loss": 0.97265625, "step": 707 }, { "dpo_loss": 0.251953125, "epoch": 0.11, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 9.734225434147842e-07, "loss": 0.2854, "projector_lr": 2.920267630244353e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2314453125, "rewards_train/margins": 1.8515625, "rewards_train/rejected": -1.6171875, "sft_loss": 0.63671875, "step": 708 }, { "dpo_loss": 0.2060546875, "epoch": 0.11, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 9.733408096387463e-07, "loss": 0.1835, "projector_lr": 2.920022428916239e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1796875, "rewards_train/margins": 3.046875, "rewards_train/rejected": -2.859375, "sft_loss": 0.6171875, "step": 709 }, { "dpo_loss": 0.44140625, "epoch": 0.11, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 9.732589538195456e-07, "loss": 0.267, "projector_lr": 2.9197768614586372e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.15625, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.140625, "sft_loss": 0.8125, "step": 710 }, { "dpo_loss": 0.30078125, "epoch": 0.11, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.731769759782876e-07, "loss": 0.2308, "projector_lr": 2.9195309279348633e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1845703125, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.15625, "sft_loss": 0.82421875, "step": 711 }, { "dpo_loss": 0.384765625, "epoch": 0.11, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 9.730948761361088e-07, "loss": 0.3362, "projector_lr": 2.919284628408327e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.07470703125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.46875, "sft_loss": 0.5390625, "step": 712 }, { "dpo_loss": 0.1298828125, "epoch": 0.11, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 9.730126543141772e-07, "loss": 0.0972, "projector_lr": 2.919037962942532e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.63671875, "rewards_train/margins": 2.625, "rewards_train/rejected": -1.984375, "sft_loss": 0.375, "step": 713 }, { "dpo_loss": 0.22265625, "epoch": 0.11, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 9.729303105336927e-07, "loss": 0.1916, "projector_lr": 2.9187909316010784e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10595703125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.390625, "sft_loss": 0.625, "step": 714 }, { "dpo_loss": 0.26953125, "epoch": 0.11, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 9.728478448158858e-07, "loss": 0.306, "projector_lr": 2.918543534447658e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2578125, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.3125, "sft_loss": 0.8046875, "step": 715 }, { "dpo_loss": 0.421875, "epoch": 0.11, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 9.727652571820194e-07, "loss": 0.4659, "projector_lr": 2.918295771546059e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.0810546875, "rewards_train/margins": 1.734375, "rewards_train/rejected": -1.6484375, "sft_loss": 0.435546875, "step": 716 }, { "dpo_loss": 0.21875, "epoch": 0.11, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 9.726825476533872e-07, "loss": 0.1732, "projector_lr": 2.9180476429601615e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.00341796875, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -1.9296875, "sft_loss": 0.9453125, "step": 717 }, { "dpo_loss": 0.328125, "epoch": 0.11, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 9.725997162513143e-07, "loss": 0.2292, "projector_lr": 2.9177991487539433e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.32421875, "rewards_train/margins": 1.5234375, "rewards_train/rejected": -1.1953125, "sft_loss": 0.6796875, "step": 718 }, { "dpo_loss": 0.2060546875, "epoch": 0.12, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 9.725167629971575e-07, "loss": 0.2436, "projector_lr": 2.9175502889914727e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.181640625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -2.640625, "sft_loss": 0.57421875, "step": 719 }, { "dpo_loss": 0.138671875, "epoch": 0.12, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 9.724336879123052e-07, "loss": 0.3804, "projector_lr": 2.9173010637369163e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37109375, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.21875, "sft_loss": 0.62109375, "step": 720 }, { "dpo_loss": 0.2060546875, "epoch": 0.12, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 9.723504910181768e-07, "loss": 0.1807, "projector_lr": 2.9170514730545307e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.3828125, "rewards_train/margins": 2.03125, "rewards_train/rejected": -1.6484375, "sft_loss": 0.84375, "step": 721 }, { "dpo_loss": 0.27734375, "epoch": 0.12, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.72267172336223e-07, "loss": 0.2135, "projector_lr": 2.9168015170086695e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.140625, "sft_loss": 0.50390625, "step": 722 }, { "dpo_loss": 0.294921875, "epoch": 0.12, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.721837318879266e-07, "loss": 0.2312, "projector_lr": 2.91655119566378e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.06884765625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.109375, "sft_loss": 0.7734375, "step": 723 }, { "dpo_loss": 0.5234375, "epoch": 0.12, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.72100169694801e-07, "loss": 0.4445, "projector_lr": 2.9163005090844034e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 1.171875, "rewards_train/rejected": -2.0625, "sft_loss": 0.62109375, "step": 724 }, { "dpo_loss": 0.11181640625, "epoch": 0.12, "final_loss": 0.11181640625, "grad_norm": 0.0, "learning_rate": 9.720164857783915e-07, "loss": 0.2579, "projector_lr": 2.9160494573351746e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.244140625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -2.84375, "sft_loss": 0.6953125, "step": 725 }, { "dpo_loss": 0.1806640625, "epoch": 0.12, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 9.719326801602744e-07, "loss": 0.2313, "projector_lr": 2.915798040480824e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1513671875, "rewards_train/margins": 2.390625, "rewards_train/rejected": -2.546875, "sft_loss": 0.671875, "step": 726 }, { "dpo_loss": 0.38671875, "epoch": 0.12, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 9.71848752862058e-07, "loss": 0.3837, "projector_lr": 2.9155462585861745e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.142578125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -3.078125, "sft_loss": 0.69921875, "step": 727 }, { "dpo_loss": 0.48046875, "epoch": 0.12, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 9.717647039053814e-07, "loss": 0.3816, "projector_lr": 2.915294111716144e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.04345703125, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.375, "sft_loss": 0.953125, "step": 728 }, { "dpo_loss": 0.2099609375, "epoch": 0.12, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 9.716805333119152e-07, "loss": 0.2659, "projector_lr": 2.9150415999357455e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.076171875, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.25, "sft_loss": 0.7421875, "step": 729 }, { "dpo_loss": 0.275390625, "epoch": 0.12, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.715962411033613e-07, "loss": 0.2004, "projector_lr": 2.9147887233100842e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.053466796875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.375, "sft_loss": 0.384765625, "step": 730 }, { "dpo_loss": 0.390625, "epoch": 0.12, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.715118273014533e-07, "loss": 0.2898, "projector_lr": 2.91453548190436e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.380859375, "rewards_train/margins": 2.65625, "rewards_train/rejected": -2.28125, "sft_loss": 0.58984375, "step": 731 }, { "dpo_loss": 0.34765625, "epoch": 0.12, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 9.714272919279556e-07, "loss": 0.3128, "projector_lr": 2.9142818757838674e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 2.09375, "rewards_train/rejected": -2.5, "sft_loss": 0.84375, "step": 732 }, { "dpo_loss": 0.1953125, "epoch": 0.12, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 9.713426350046647e-07, "loss": 0.1645, "projector_lr": 2.9140279050139945e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.8125, "sft_loss": 0.71875, "step": 733 }, { "dpo_loss": 0.1064453125, "epoch": 0.12, "final_loss": 0.1064453125, "grad_norm": 0.0, "learning_rate": 9.712578565534076e-07, "loss": 0.2431, "projector_lr": 2.913773569660223e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5703125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -2.125, "sft_loss": 0.60546875, "step": 734 }, { "dpo_loss": 0.341796875, "epoch": 0.12, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 9.711729565960432e-07, "loss": 0.3004, "projector_lr": 2.9135188697881297e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.447265625, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.15625, "sft_loss": 0.53125, "step": 735 }, { "dpo_loss": 0.2255859375, "epoch": 0.12, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 9.710879351544616e-07, "loss": 0.4259, "projector_lr": 2.913263805463385e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.193359375, "rewards_train/margins": 2.15625, "rewards_train/rejected": -1.953125, "sft_loss": 0.57421875, "step": 736 }, { "dpo_loss": 0.275390625, "epoch": 0.12, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.710027922505842e-07, "loss": 0.2207, "projector_lr": 2.9130083767517525e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.130859375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -2.890625, "sft_loss": 0.69921875, "step": 737 }, { "dpo_loss": 0.2138671875, "epoch": 0.12, "final_loss": 0.2138671875, "grad_norm": 0.0, "learning_rate": 9.709175279063634e-07, "loss": 0.193, "projector_lr": 2.9127525837190907e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.09375, "rewards_train/margins": 2.390625, "rewards_train/rejected": -2.296875, "sft_loss": 0.8359375, "step": 738 }, { "dpo_loss": 0.1591796875, "epoch": 0.12, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 9.708321421437836e-07, "loss": 0.203, "projector_lr": 2.912496426431351e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.056884765625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -3.875, "sft_loss": 0.65234375, "step": 739 }, { "dpo_loss": 0.35546875, "epoch": 0.12, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 9.7074663498486e-07, "loss": 0.2589, "projector_lr": 2.91223990495458e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.71875, "sft_loss": 0.9140625, "step": 740 }, { "dpo_loss": 0.2138671875, "epoch": 0.12, "final_loss": 0.2138671875, "grad_norm": 0.0, "learning_rate": 9.706610064516391e-07, "loss": 0.2152, "projector_lr": 2.9119830193549176e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.296875, "sft_loss": 0.83984375, "step": 741 }, { "dpo_loss": 0.416015625, "epoch": 0.12, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 9.705752565661989e-07, "loss": 0.2607, "projector_lr": 2.911725769698597e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 1.359375, "rewards_train/rejected": -1.71875, "sft_loss": 0.546875, "step": 742 }, { "dpo_loss": 0.32421875, "epoch": 0.12, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 9.704893853506486e-07, "loss": 0.422, "projector_lr": 2.911468156051946e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 1.4765625, "rewards_train/rejected": -2.1875, "sft_loss": 0.640625, "step": 743 }, { "dpo_loss": 0.28515625, "epoch": 0.12, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 9.704033928271286e-07, "loss": 0.3819, "projector_lr": 2.9112101784813862e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 3.171875, "rewards_train/rejected": -3.8125, "sft_loss": 0.62890625, "step": 744 }, { "dpo_loss": 0.369140625, "epoch": 0.12, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.70317279017811e-07, "loss": 0.3267, "projector_lr": 2.910951837053433e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.71875, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.71875, "sft_loss": 0.55078125, "step": 745 }, { "dpo_loss": 0.546875, "epoch": 0.12, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 9.702310439448984e-07, "loss": 0.5018, "projector_lr": 2.9106931318346956e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1552734375, "rewards_train/margins": 1.4921875, "rewards_train/rejected": -1.6484375, "sft_loss": 0.7578125, "step": 746 }, { "dpo_loss": 0.796875, "epoch": 0.12, "final_loss": 0.796875, "grad_norm": 0.0, "learning_rate": 9.701446876306252e-07, "loss": 0.6235, "projector_lr": 2.910434062891876e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 1.0, "rewards_train/rejected": -2.359375, "sft_loss": 0.765625, "step": 747 }, { "dpo_loss": 0.1005859375, "epoch": 0.12, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 9.700582100972572e-07, "loss": 0.1333, "projector_lr": 2.9101746302917717e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1572265625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -2.8125, "sft_loss": 0.828125, "step": 748 }, { "dpo_loss": 0.376953125, "epoch": 0.12, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 9.69971611367091e-07, "loss": 0.2456, "projector_lr": 2.909914834101273e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.53125, "sft_loss": 0.53515625, "step": 749 }, { "dpo_loss": 0.54296875, "epoch": 0.12, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 9.698848914624547e-07, "loss": 0.381, "projector_lr": 2.9096546743873642e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 1.8828125, "rewards_train/rejected": -2.5625, "sft_loss": 0.6484375, "step": 750 }, { "dpo_loss": 0.2255859375, "epoch": 0.12, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 9.697980504057075e-07, "loss": 0.313, "projector_lr": 2.9093941512171226e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.8125, "sft_loss": 0.69140625, "step": 751 }, { "dpo_loss": 0.361328125, "epoch": 0.12, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.6971108821924e-07, "loss": 0.41, "projector_lr": 2.9091332646577204e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.053466796875, "rewards_train/margins": 2.125, "rewards_train/rejected": -2.171875, "sft_loss": 0.8671875, "step": 752 }, { "dpo_loss": 0.2890625, "epoch": 0.12, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.696240049254742e-07, "loss": 0.2412, "projector_lr": 2.908872014776423e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.059326171875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.328125, "sft_loss": 0.625, "step": 753 }, { "dpo_loss": 0.232421875, "epoch": 0.12, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.69536800546863e-07, "loss": 0.2115, "projector_lr": 2.908610401640589e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 2.609375, "rewards_train/rejected": -3.078125, "sft_loss": 0.62890625, "step": 754 }, { "dpo_loss": 0.1435546875, "epoch": 0.12, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 9.694494751058903e-07, "loss": 0.1366, "projector_lr": 2.908348425317671e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.212890625, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.078125, "sft_loss": 0.96875, "step": 755 }, { "dpo_loss": 0.12353515625, "epoch": 0.12, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 9.69362028625072e-07, "loss": 0.3089, "projector_lr": 2.9080860858752163e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.11865234375, "rewards_train/margins": 2.796875, "rewards_train/rejected": -2.671875, "sft_loss": 0.5234375, "step": 756 }, { "dpo_loss": 0.455078125, "epoch": 0.12, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 9.692744611269543e-07, "loss": 0.3743, "projector_lr": 2.9078233833808634e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.203125, "rewards_train/margins": 1.5703125, "rewards_train/rejected": -1.359375, "sft_loss": 0.51171875, "step": 757 }, { "dpo_loss": 0.271484375, "epoch": 0.12, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 9.691867726341156e-07, "loss": 0.2125, "projector_lr": 2.9075603179023466e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.259765625, "rewards_train/margins": 2.15625, "rewards_train/rejected": -2.421875, "sft_loss": 0.57421875, "step": 758 }, { "dpo_loss": 0.1572265625, "epoch": 0.12, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 9.690989631691642e-07, "loss": 0.1277, "projector_lr": 2.907296889507493e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2021484375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -4.5625, "sft_loss": 0.86328125, "step": 759 }, { "dpo_loss": 0.134765625, "epoch": 0.12, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 9.69011032754741e-07, "loss": 0.1378, "projector_lr": 2.907033098264223e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2314453125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.34375, "sft_loss": 0.625, "step": 760 }, { "dpo_loss": 0.33984375, "epoch": 0.12, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 9.689229814135172e-07, "loss": 0.3195, "projector_lr": 2.9067689442405517e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.78125, "sft_loss": 0.80859375, "step": 761 }, { "dpo_loss": 0.2236328125, "epoch": 0.12, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 9.68834809168195e-07, "loss": 0.3103, "projector_lr": 2.9065044275045853e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.53515625, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.078125, "sft_loss": 0.734375, "step": 762 }, { "dpo_loss": 0.236328125, "epoch": 0.12, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 9.687465160415087e-07, "loss": 0.4518, "projector_lr": 2.9062395481245262e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1318359375, "rewards_train/margins": 2.9375, "rewards_train/rejected": -2.8125, "sft_loss": 0.84765625, "step": 763 }, { "dpo_loss": 0.25390625, "epoch": 0.12, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.68658102056223e-07, "loss": 0.2625, "projector_lr": 2.9059743061686696e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.035400390625, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.578125, "sft_loss": 0.6328125, "step": 764 }, { "dpo_loss": 0.59375, "epoch": 0.12, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 9.685695672351342e-07, "loss": 0.3973, "projector_lr": 2.9057087017054028e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3671875, "rewards_train/margins": 2.09375, "rewards_train/rejected": -2.453125, "sft_loss": 0.6796875, "step": 765 }, { "dpo_loss": 0.94921875, "epoch": 0.12, "final_loss": 0.94921875, "grad_norm": 0.0, "learning_rate": 9.684809116010692e-07, "loss": 0.6121, "projector_lr": 2.905442734803208e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.03125, "rewards_train/margins": 0.236328125, "rewards_train/rejected": -1.265625, "sft_loss": 0.765625, "step": 766 }, { "dpo_loss": 0.2060546875, "epoch": 0.12, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 9.683921351768866e-07, "loss": 0.123, "projector_lr": 2.9051764055306602e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.021484375, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.328125, "sft_loss": 0.71484375, "step": 767 }, { "dpo_loss": 0.45703125, "epoch": 0.12, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 9.683032379854761e-07, "loss": 0.3616, "projector_lr": 2.9049097139564284e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 1.0859375, "rewards_train/rejected": -1.515625, "sft_loss": 0.6953125, "step": 768 }, { "dpo_loss": 0.68359375, "epoch": 0.12, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 9.682142200497581e-07, "loss": 0.6324, "projector_lr": 2.9046426601492746e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1162109375, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.671875, "sft_loss": 0.90625, "step": 769 }, { "dpo_loss": 0.369140625, "epoch": 0.12, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.681250813926844e-07, "loss": 0.2853, "projector_lr": 2.9043752441780536e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.056640625, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.125, "sft_loss": 0.72265625, "step": 770 }, { "dpo_loss": 0.1279296875, "epoch": 0.12, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 9.680358220372381e-07, "loss": 0.234, "projector_lr": 2.904107466111715e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.443359375, "rewards_train/margins": 3.15625, "rewards_train/rejected": -2.71875, "sft_loss": 0.7265625, "step": 771 }, { "dpo_loss": 0.150390625, "epoch": 0.12, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 9.679464420064334e-07, "loss": 0.1322, "projector_lr": 2.9038393260193007e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25, "rewards_train/margins": 2.390625, "rewards_train/rejected": -2.140625, "sft_loss": 0.8203125, "step": 772 }, { "dpo_loss": 0.053466796875, "epoch": 0.12, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 9.678569413233153e-07, "loss": 0.0689, "projector_lr": 2.903570823969946e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2060546875, "rewards_train/margins": 3.625, "rewards_train/rejected": -3.421875, "sft_loss": 0.6328125, "step": 773 }, { "dpo_loss": 0.61328125, "epoch": 0.12, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 9.677673200109599e-07, "loss": 0.4266, "projector_lr": 2.9033019600328803e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.671875, "sft_loss": 0.96484375, "step": 774 }, { "dpo_loss": 0.1787109375, "epoch": 0.12, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 9.67677578092475e-07, "loss": 0.1834, "projector_lr": 2.9030327342774252e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.462890625, "rewards_train/margins": 2.65625, "rewards_train/rejected": -2.1875, "sft_loss": 0.78125, "step": 775 }, { "dpo_loss": 0.33203125, "epoch": 0.12, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.675877155909987e-07, "loss": 0.2589, "projector_lr": 2.9027631467729962e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.036865234375, "rewards_train/margins": 1.9609375, "rewards_train/rejected": -1.921875, "sft_loss": 0.87109375, "step": 776 }, { "dpo_loss": 0.2158203125, "epoch": 0.12, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 9.674977325297008e-07, "loss": 0.2852, "projector_lr": 2.9024931975891025e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.40625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -1.7890625, "sft_loss": 0.56640625, "step": 777 }, { "dpo_loss": 0.035400390625, "epoch": 0.12, "final_loss": 0.035400390625, "grad_norm": 0.0, "learning_rate": 9.674076289317817e-07, "loss": 0.0871, "projector_lr": 2.902222886795346e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.193359375, "rewards_train/margins": 3.640625, "rewards_train/rejected": -3.453125, "sft_loss": 0.7578125, "step": 778 }, { "dpo_loss": 0.3125, "epoch": 0.12, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 9.673174048204735e-07, "loss": 0.3486, "projector_lr": 2.9019522144614208e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.453125, "rewards_train/margins": 1.484375, "rewards_train/rejected": -1.03125, "sft_loss": 0.76171875, "step": 779 }, { "dpo_loss": 0.37890625, "epoch": 0.12, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 9.672270602190388e-07, "loss": 0.249, "projector_lr": 2.9016811806571167e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.224609375, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.3125, "sft_loss": 0.69140625, "step": 780 }, { "dpo_loss": 0.15625, "epoch": 0.12, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 9.671365951507715e-07, "loss": 0.2056, "projector_lr": 2.901409785452315e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -2.609375, "sft_loss": 0.58203125, "step": 781 }, { "dpo_loss": 0.71875, "epoch": 0.13, "final_loss": 0.71875, "grad_norm": 0.0, "learning_rate": 9.670460096389965e-07, "loss": 0.5813, "projector_lr": 2.9011380289169898e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 2.15625, "rewards_train/rejected": -2.84375, "sft_loss": 0.474609375, "step": 782 }, { "dpo_loss": 0.08056640625, "epoch": 0.13, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 9.669553037070698e-07, "loss": 0.2476, "projector_lr": 2.9008659111212097e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.046875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.03125, "sft_loss": 0.8515625, "step": 783 }, { "dpo_loss": 0.1591796875, "epoch": 0.13, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 9.668644773783785e-07, "loss": 0.3041, "projector_lr": 2.9005934321351358e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10595703125, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.15625, "sft_loss": 0.703125, "step": 784 }, { "dpo_loss": 0.46875, "epoch": 0.13, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 9.667735306763407e-07, "loss": 0.4227, "projector_lr": 2.9003205920290225e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1259765625, "rewards_train/margins": 1.828125, "rewards_train/rejected": -1.953125, "sft_loss": 0.9453125, "step": 785 }, { "dpo_loss": 0.17578125, "epoch": 0.13, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 9.666824636244054e-07, "loss": 0.161, "projector_lr": 2.900047390873216e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 2.359375, "rewards_train/rejected": -2.796875, "sft_loss": 0.8359375, "step": 786 }, { "dpo_loss": 0.27734375, "epoch": 0.13, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.665912762460526e-07, "loss": 0.3303, "projector_lr": 2.899773828738158e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.453125, "rewards_train/margins": 2.28125, "rewards_train/rejected": -1.828125, "sft_loss": 0.8828125, "step": 787 }, { "dpo_loss": 0.4453125, "epoch": 0.13, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 9.664999685647938e-07, "loss": 0.2854, "projector_lr": 2.899499905694382e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.125, "rewards_train/margins": 1.578125, "rewards_train/rejected": -1.453125, "sft_loss": 0.6875, "step": 788 }, { "dpo_loss": 0.3828125, "epoch": 0.13, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 9.664085406041713e-07, "loss": 0.2151, "projector_lr": 2.899225621812514e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.349609375, "rewards_train/margins": 2.078125, "rewards_train/rejected": -1.7265625, "sft_loss": 0.734375, "step": 789 }, { "dpo_loss": 0.1318359375, "epoch": 0.13, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 9.663169923877577e-07, "loss": 0.2161, "projector_lr": 2.8989509771632733e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.8125, "sft_loss": 0.68359375, "step": 790 }, { "dpo_loss": 0.400390625, "epoch": 0.13, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 9.662253239391578e-07, "loss": 0.2428, "projector_lr": 2.8986759718174733e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.171875, "rewards_train/margins": 1.2578125, "rewards_train/rejected": -1.0859375, "sft_loss": 0.59375, "step": 791 }, { "dpo_loss": 0.09130859375, "epoch": 0.13, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 9.661335352820064e-07, "loss": 0.1717, "projector_lr": 2.898400605846019e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.359375, "rewards_train/margins": 3.015625, "rewards_train/rejected": -2.65625, "sft_loss": 0.5390625, "step": 792 }, { "dpo_loss": 0.20703125, "epoch": 0.13, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 9.660416264399698e-07, "loss": 0.3117, "projector_lr": 2.8981248793199095e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.10595703125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -2.734375, "sft_loss": 0.578125, "step": 793 }, { "dpo_loss": 0.08447265625, "epoch": 0.13, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 9.659495974367454e-07, "loss": 0.3591, "projector_lr": 2.897848792310236e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.7109375, "rewards_train/margins": 3.625, "rewards_train/rejected": -2.921875, "sft_loss": 0.90625, "step": 794 }, { "dpo_loss": 0.25390625, "epoch": 0.13, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.65857448296061e-07, "loss": 0.2152, "projector_lr": 2.8975723448881836e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.06884765625, "rewards_train/margins": 3.046875, "rewards_train/rejected": -2.984375, "sft_loss": 0.93359375, "step": 795 }, { "dpo_loss": 0.1591796875, "epoch": 0.13, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 9.657651790416763e-07, "loss": 0.1356, "projector_lr": 2.8972955371250293e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.078125, "rewards_train/margins": 2.46875, "rewards_train/rejected": -2.390625, "sft_loss": 0.734375, "step": 796 }, { "dpo_loss": 0.34375, "epoch": 0.13, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 9.656727896973809e-07, "loss": 0.3139, "projector_lr": 2.8970183690921428e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.255859375, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.234375, "sft_loss": 0.515625, "step": 797 }, { "dpo_loss": 0.36328125, "epoch": 0.13, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 9.65580280286996e-07, "loss": 0.2668, "projector_lr": 2.8967408408609887e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 1.734375, "rewards_train/rejected": -2.3125, "sft_loss": 0.73828125, "step": 798 }, { "dpo_loss": 0.412109375, "epoch": 0.13, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 9.654876508343738e-07, "loss": 0.4208, "projector_lr": 2.8964629525031215e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 2.203125, "rewards_train/rejected": -3.15625, "sft_loss": 0.8671875, "step": 799 }, { "dpo_loss": 0.18359375, "epoch": 0.13, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 9.65394901363397e-07, "loss": 0.1173, "projector_lr": 2.8961847040901915e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.53515625, "rewards_train/margins": 3.390625, "rewards_train/rejected": -2.859375, "sft_loss": 0.44921875, "step": 800 }, { "dpo_loss": 0.1962890625, "epoch": 0.13, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 9.653020318979798e-07, "loss": 0.1983, "projector_lr": 2.8959060956939396e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.404296875, "rewards_train/margins": 2.78125, "rewards_train/rejected": -2.375, "sft_loss": 1.09375, "step": 801 }, { "dpo_loss": 0.07763671875, "epoch": 0.13, "final_loss": 0.07763671875, "grad_norm": 0.0, "learning_rate": 9.652090424620671e-07, "loss": 0.1477, "projector_lr": 2.895627127386202e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.65625, "rewards_train/margins": 3.515625, "rewards_train/rejected": -2.859375, "sft_loss": 0.83203125, "step": 802 }, { "dpo_loss": 0.2890625, "epoch": 0.13, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.651159330796344e-07, "loss": 0.2532, "projector_lr": 2.8953477992389034e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1318359375, "rewards_train/margins": 1.6484375, "rewards_train/rejected": -1.78125, "sft_loss": 0.5625, "step": 803 }, { "dpo_loss": 0.19921875, "epoch": 0.13, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 9.650227037746887e-07, "loss": 0.2311, "projector_lr": 2.8950681113240664e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.28515625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.03125, "sft_loss": 0.69921875, "step": 804 }, { "dpo_loss": 0.65234375, "epoch": 0.13, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 9.649293545712676e-07, "loss": 0.836, "projector_lr": 2.894788063713803e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.0908203125, "rewards_train/margins": 1.71875, "rewards_train/rejected": -1.8125, "sft_loss": 0.69921875, "step": 805 }, { "dpo_loss": 0.1337890625, "epoch": 0.13, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 9.648358854934394e-07, "loss": 0.2682, "projector_lr": 2.8945076564803183e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1220703125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -2.796875, "sft_loss": 0.65234375, "step": 806 }, { "dpo_loss": 0.30859375, "epoch": 0.13, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 9.647422965653038e-07, "loss": 0.2559, "projector_lr": 2.8942268896959115e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.03759765625, "rewards_train/margins": 3.109375, "rewards_train/rejected": -3.0625, "sft_loss": 0.86328125, "step": 807 }, { "dpo_loss": 0.1572265625, "epoch": 0.13, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 9.646485878109913e-07, "loss": 0.2475, "projector_lr": 2.893945763432974e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 1.4453125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -3.203125, "sft_loss": 0.56640625, "step": 808 }, { "dpo_loss": 0.103515625, "epoch": 0.13, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 9.64554759254663e-07, "loss": 0.1527, "projector_lr": 2.8936642777639892e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.181640625, "rewards_train/margins": 3.0625, "rewards_train/rejected": -2.875, "sft_loss": 0.6875, "step": 809 }, { "dpo_loss": 0.72265625, "epoch": 0.13, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 9.64460810920511e-07, "loss": 0.4734, "projector_lr": 2.893382432761533e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.46484375, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.15625, "sft_loss": 0.6171875, "step": 810 }, { "dpo_loss": 0.369140625, "epoch": 0.13, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.643667428327584e-07, "loss": 0.5912, "projector_lr": 2.893100228498275e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.08154296875, "rewards_train/margins": 2.234375, "rewards_train/rejected": -2.15625, "sft_loss": 0.80859375, "step": 811 }, { "dpo_loss": 0.6171875, "epoch": 0.13, "final_loss": 0.6171875, "grad_norm": 0.0, "learning_rate": 9.64272555015659e-07, "loss": 0.421, "projector_lr": 2.892817665046977e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.1123046875, "rewards_train/margins": 0.73046875, "rewards_train/rejected": -0.6171875, "sft_loss": 0.734375, "step": 812 }, { "dpo_loss": 0.23828125, "epoch": 0.13, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 9.641782474934977e-07, "loss": 0.2263, "projector_lr": 2.8925347424804933e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.21875, "rewards_train/margins": 2.765625, "rewards_train/rejected": -2.546875, "sft_loss": 0.71875, "step": 813 }, { "dpo_loss": 0.2314453125, "epoch": 0.13, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 9.640838202905903e-07, "loss": 0.2625, "projector_lr": 2.8922514608717713e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.453125, "rewards_train/margins": 2.8125, "rewards_train/rejected": -2.359375, "sft_loss": 0.78515625, "step": 814 }, { "dpo_loss": 0.283203125, "epoch": 0.13, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 9.63989273431283e-07, "loss": 0.2699, "projector_lr": 2.8919678202938494e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.00634765625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -2.671875, "sft_loss": 0.7578125, "step": 815 }, { "dpo_loss": 0.1875, "epoch": 0.13, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 9.638946069399534e-07, "loss": 0.2385, "projector_lr": 2.8916838208198605e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09375, "rewards_train/margins": 2.75, "rewards_train/rejected": -2.65625, "sft_loss": 0.74609375, "step": 816 }, { "dpo_loss": 0.193359375, "epoch": 0.13, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 9.637998208410097e-07, "loss": 0.2487, "projector_lr": 2.891399462523029e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.33203125, "rewards_train/margins": 2.515625, "rewards_train/rejected": -2.1875, "sft_loss": 0.765625, "step": 817 }, { "dpo_loss": 0.486328125, "epoch": 0.13, "final_loss": 0.486328125, "grad_norm": 0.0, "learning_rate": 9.637049151588906e-07, "loss": 0.3414, "projector_lr": 2.891114745476672e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.00396728515625, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -1.7890625, "sft_loss": 0.79296875, "step": 818 }, { "dpo_loss": 0.294921875, "epoch": 0.13, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.636098899180666e-07, "loss": 0.2219, "projector_lr": 2.8908296697541997e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.23046875, "rewards_train/margins": 1.6640625, "rewards_train/rejected": -1.8984375, "sft_loss": 0.73046875, "step": 819 }, { "dpo_loss": 0.3046875, "epoch": 0.13, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.635147451430376e-07, "loss": 0.1888, "projector_lr": 2.890544235429113e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 1.828125, "rewards_train/rejected": -2.625, "sft_loss": 0.60546875, "step": 820 }, { "dpo_loss": 0.275390625, "epoch": 0.13, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.634194808583356e-07, "loss": 0.3121, "projector_lr": 2.890258442575007e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.09326171875, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.15625, "sft_loss": 0.5234375, "step": 821 }, { "dpo_loss": 0.076171875, "epoch": 0.13, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 9.63324097088523e-07, "loss": 0.2482, "projector_lr": 2.8899722912655692e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1904296875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.390625, "sft_loss": 0.66796875, "step": 822 }, { "dpo_loss": 0.2421875, "epoch": 0.13, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 9.632285938581928e-07, "loss": 0.1865, "projector_lr": 2.8896857815745785e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.515625, "sft_loss": 0.61328125, "step": 823 }, { "dpo_loss": 0.1494140625, "epoch": 0.13, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 9.63132971191969e-07, "loss": 0.4424, "projector_lr": 2.889398913575907e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1748046875, "rewards_train/margins": 2.96875, "rewards_train/rejected": -2.796875, "sft_loss": 0.75390625, "step": 824 }, { "dpo_loss": 0.396484375, "epoch": 0.13, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 9.630372291145061e-07, "loss": 0.2854, "projector_lr": 2.8891116873435186e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.5625, "sft_loss": 0.73046875, "step": 825 }, { "dpo_loss": 0.080078125, "epoch": 0.13, "final_loss": 0.080078125, "grad_norm": 0.0, "learning_rate": 9.629413676504901e-07, "loss": 0.1797, "projector_lr": 2.88882410295147e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.03857421875, "rewards_train/margins": 3.4375, "rewards_train/rejected": -3.40625, "sft_loss": 0.61328125, "step": 826 }, { "dpo_loss": 0.2265625, "epoch": 0.13, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 9.628453868246368e-07, "loss": 0.448, "projector_lr": 2.8885361604739105e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10986328125, "rewards_train/margins": 1.921875, "rewards_train/rejected": -2.03125, "sft_loss": 0.859375, "step": 827 }, { "dpo_loss": 0.1630859375, "epoch": 0.13, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 9.627492866616934e-07, "loss": 0.1312, "projector_lr": 2.888247859985081e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 2.40625, "rewards_train/rejected": -3.09375, "sft_loss": 0.58203125, "step": 828 }, { "dpo_loss": 0.38671875, "epoch": 0.13, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 9.626530671864381e-07, "loss": 0.2723, "projector_lr": 2.8879592015593147e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.306640625, "rewards_train/margins": 2.78125, "rewards_train/rejected": -2.46875, "sft_loss": 0.828125, "step": 829 }, { "dpo_loss": 0.341796875, "epoch": 0.13, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 9.625567284236792e-07, "loss": 0.4859, "projector_lr": 2.887670185271038e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.419921875, "rewards_train/margins": 1.671875, "rewards_train/rejected": -1.25, "sft_loss": 0.7734375, "step": 830 }, { "dpo_loss": 0.09521484375, "epoch": 0.13, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 9.62460270398256e-07, "loss": 0.2562, "projector_lr": 2.8873808111947685e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.00067138671875, "rewards_train/margins": 2.84375, "rewards_train/rejected": -2.84375, "sft_loss": 0.609375, "step": 831 }, { "dpo_loss": 0.11328125, "epoch": 0.13, "final_loss": 0.11328125, "grad_norm": 0.0, "learning_rate": 9.623636931350389e-07, "loss": 0.1884, "projector_lr": 2.887091079405117e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01806640625, "rewards_train/margins": 2.8125, "rewards_train/rejected": -2.828125, "sft_loss": 0.7421875, "step": 832 }, { "dpo_loss": 0.2080078125, "epoch": 0.13, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 9.622669966589285e-07, "loss": 0.1683, "projector_lr": 2.8868009899767862e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21875, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.078125, "sft_loss": 0.58984375, "step": 833 }, { "dpo_loss": 0.48828125, "epoch": 0.13, "final_loss": 0.48828125, "grad_norm": 0.0, "learning_rate": 9.621701809948566e-07, "loss": 0.3875, "projector_lr": 2.8865105429845705e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1123046875, "rewards_train/margins": 1.15625, "rewards_train/rejected": -1.2734375, "sft_loss": 0.55859375, "step": 834 }, { "dpo_loss": 0.294921875, "epoch": 0.13, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.620732461677856e-07, "loss": 0.2295, "projector_lr": 2.8862197385033568e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.330078125, "rewards_train/margins": 2.5, "rewards_train/rejected": -2.828125, "sft_loss": 0.640625, "step": 835 }, { "dpo_loss": 0.1328125, "epoch": 0.13, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 9.61976192202708e-07, "loss": 0.188, "projector_lr": 2.8859285766081244e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2216796875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -2.515625, "sft_loss": 0.76953125, "step": 836 }, { "dpo_loss": 0.439453125, "epoch": 0.13, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.618790191246484e-07, "loss": 0.3766, "projector_lr": 2.8856370573739456e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.0625, "sft_loss": 0.609375, "step": 837 }, { "dpo_loss": 0.31640625, "epoch": 0.13, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 9.617817269586605e-07, "loss": 0.457, "projector_lr": 2.8853451808759817e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.53125, "sft_loss": 0.62109375, "step": 838 }, { "dpo_loss": 0.419921875, "epoch": 0.13, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 9.6168431572983e-07, "loss": 0.5123, "projector_lr": 2.8850529471894904e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.15625, "rewards_train/margins": 2.703125, "rewards_train/rejected": -2.859375, "sft_loss": 0.81640625, "step": 839 }, { "dpo_loss": 0.21875, "epoch": 0.13, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 9.615867854632727e-07, "loss": 0.4264, "projector_lr": 2.8847603563898186e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.369140625, "rewards_train/margins": 2.84375, "rewards_train/rejected": -2.484375, "sft_loss": 0.77734375, "step": 840 }, { "dpo_loss": 0.1396484375, "epoch": 0.13, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 9.61489136184135e-07, "loss": 0.3023, "projector_lr": 2.8844674085524053e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.478515625, "rewards_train/margins": 3.03125, "rewards_train/rejected": -2.546875, "sft_loss": 0.578125, "step": 841 }, { "dpo_loss": 0.201171875, "epoch": 0.13, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 9.613913679175945e-07, "loss": 0.2219, "projector_lr": 2.8841741037527836e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.0625, "sft_loss": 0.69921875, "step": 842 }, { "dpo_loss": 0.2412109375, "epoch": 0.13, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.612934806888587e-07, "loss": 0.2241, "projector_lr": 2.8838804420665764e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2373046875, "rewards_train/margins": 2.90625, "rewards_train/rejected": -3.140625, "sft_loss": 0.6640625, "step": 843 }, { "dpo_loss": 0.302734375, "epoch": 0.14, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.611954745231666e-07, "loss": 0.2374, "projector_lr": 2.8835864235695e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.251953125, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.046875, "sft_loss": 0.640625, "step": 844 }, { "dpo_loss": 0.330078125, "epoch": 0.14, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 9.61097349445787e-07, "loss": 0.3266, "projector_lr": 2.8832920483373617e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.052978515625, "rewards_train/margins": 1.625, "rewards_train/rejected": -1.6796875, "sft_loss": 0.86328125, "step": 845 }, { "dpo_loss": 0.4375, "epoch": 0.14, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 9.609991054820206e-07, "loss": 0.3595, "projector_lr": 2.8829973164460617e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 0.98828125, "rewards_train/rejected": -1.8046875, "sft_loss": 0.7421875, "step": 846 }, { "dpo_loss": 0.1416015625, "epoch": 0.14, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 9.609007426571973e-07, "loss": 0.1818, "projector_lr": 2.8827022279715918e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.5390625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -2.546875, "sft_loss": 0.98046875, "step": 847 }, { "dpo_loss": 0.201171875, "epoch": 0.14, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 9.608022609966786e-07, "loss": 0.2076, "projector_lr": 2.882406782990036e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0869140625, "rewards_train/margins": 2.9375, "rewards_train/rejected": -3.03125, "sft_loss": 0.8046875, "step": 848 }, { "dpo_loss": 0.31640625, "epoch": 0.14, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 9.607036605258564e-07, "loss": 0.2029, "projector_lr": 2.882110981577569e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.224609375, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.5, "sft_loss": 0.5625, "step": 849 }, { "dpo_loss": 0.63671875, "epoch": 0.14, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 9.60604941270153e-07, "loss": 0.4616, "projector_lr": 2.8818148238104595e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78125, "rewards_train/margins": 1.8671875, "rewards_train/rejected": -2.65625, "sft_loss": 0.54296875, "step": 850 }, { "dpo_loss": 0.2412109375, "epoch": 0.14, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.60506103255022e-07, "loss": 0.1554, "projector_lr": 2.8815183097650665e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0206298828125, "rewards_train/margins": 2.890625, "rewards_train/rejected": -2.90625, "sft_loss": 0.57421875, "step": 851 }, { "dpo_loss": 0.10302734375, "epoch": 0.14, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 9.60407146505947e-07, "loss": 0.2249, "projector_lr": 2.8812214395178408e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6796875, "rewards_train/margins": 4.875, "rewards_train/rejected": -4.1875, "sft_loss": 0.8515625, "step": 852 }, { "dpo_loss": 0.35546875, "epoch": 0.14, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 9.60308071048442e-07, "loss": 0.2069, "projector_lr": 2.8809242131453257e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.390625, "sft_loss": 0.6875, "step": 853 }, { "dpo_loss": 0.5234375, "epoch": 0.14, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.602088769080523e-07, "loss": 0.6644, "projector_lr": 2.880626630724157e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.546875, "rewards_train/margins": 1.359375, "rewards_train/rejected": -1.90625, "sft_loss": 0.87109375, "step": 854 }, { "dpo_loss": 0.083984375, "epoch": 0.14, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 9.601095641103535e-07, "loss": 0.0526, "projector_lr": 2.880328692331061e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05029296875, "rewards_train/margins": 2.890625, "rewards_train/rejected": -2.9375, "sft_loss": 0.7109375, "step": 855 }, { "dpo_loss": 0.3671875, "epoch": 0.14, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 9.600101326809516e-07, "loss": 0.2504, "projector_lr": 2.8800303980428547e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 2.390625, "rewards_train/rejected": -2.671875, "sft_loss": 0.703125, "step": 856 }, { "dpo_loss": 0.326171875, "epoch": 0.14, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 9.599105826454835e-07, "loss": 0.2638, "projector_lr": 2.879731747936451e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.365234375, "rewards_train/margins": 1.78125, "rewards_train/rejected": -2.140625, "sft_loss": 0.8125, "step": 857 }, { "dpo_loss": 0.1435546875, "epoch": 0.14, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 9.598109140296166e-07, "loss": 0.1864, "projector_lr": 2.8794327420888503e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.75, "sft_loss": 0.7421875, "step": 858 }, { "dpo_loss": 0.275390625, "epoch": 0.14, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.597111268590486e-07, "loss": 0.2716, "projector_lr": 2.8791333805771463e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.9375, "sft_loss": 0.8046875, "step": 859 }, { "dpo_loss": 0.33203125, "epoch": 0.14, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.596112211595082e-07, "loss": 0.2649, "projector_lr": 2.878833663478525e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.359375, "sft_loss": 0.7890625, "step": 860 }, { "dpo_loss": 0.255859375, "epoch": 0.14, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.595111969567545e-07, "loss": 0.3994, "projector_lr": 2.8785335908702634e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71484375, "rewards_train/margins": 3.140625, "rewards_train/rejected": -3.84375, "sft_loss": 0.8359375, "step": 861 }, { "dpo_loss": 0.361328125, "epoch": 0.14, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.594110542765767e-07, "loss": 0.2702, "projector_lr": 2.8782331628297307e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.140625, "sft_loss": 0.7109375, "step": 862 }, { "dpo_loss": 0.37890625, "epoch": 0.14, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 9.593107931447954e-07, "loss": 0.2492, "projector_lr": 2.8779323794343863e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 1.953125, "rewards_train/rejected": -2.28125, "sft_loss": 0.69921875, "step": 863 }, { "dpo_loss": 0.232421875, "epoch": 0.14, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 9.59210413587261e-07, "loss": 0.1479, "projector_lr": 2.877631240761783e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.404296875, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.625, "sft_loss": 0.66796875, "step": 864 }, { "dpo_loss": 0.30859375, "epoch": 0.14, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 9.591099156298547e-07, "loss": 0.2812, "projector_lr": 2.8773297468895647e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.109375, "sft_loss": 0.75, "step": 865 }, { "dpo_loss": 0.14453125, "epoch": 0.14, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 9.590092992984887e-07, "loss": 0.1531, "projector_lr": 2.877027897895466e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.298828125, "rewards_train/margins": 2.96875, "rewards_train/rejected": -2.671875, "sft_loss": 0.62890625, "step": 866 }, { "dpo_loss": 0.2197265625, "epoch": 0.14, "final_loss": 0.2197265625, "grad_norm": 0.0, "learning_rate": 9.589085646191046e-07, "loss": 0.1331, "projector_lr": 2.876725693857314e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.84375, "sft_loss": 0.625, "step": 867 }, { "dpo_loss": 0.1484375, "epoch": 0.14, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 9.588077116176756e-07, "loss": 0.6041, "projector_lr": 2.876423134853027e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05712890625, "rewards_train/margins": 2.9375, "rewards_train/rejected": -3.0, "sft_loss": 0.86328125, "step": 868 }, { "dpo_loss": 0.08447265625, "epoch": 0.14, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 9.587067403202048e-07, "loss": 0.2531, "projector_lr": 2.876120220960615e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.625, "sft_loss": 0.69140625, "step": 869 }, { "dpo_loss": 0.2314453125, "epoch": 0.14, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 9.586056507527264e-07, "loss": 0.531, "projector_lr": 2.8758169522581796e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28125, "rewards_train/margins": 2.078125, "rewards_train/rejected": -1.796875, "sft_loss": 0.62109375, "step": 870 }, { "dpo_loss": 0.302734375, "epoch": 0.14, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.585044429413044e-07, "loss": 0.4876, "projector_lr": 2.875513328823913e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.75, "sft_loss": 0.474609375, "step": 871 }, { "dpo_loss": 0.173828125, "epoch": 0.14, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 9.584031169120333e-07, "loss": 0.1854, "projector_lr": 2.8752093507361003e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.08154296875, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.09375, "sft_loss": 0.71484375, "step": 872 }, { "dpo_loss": 0.4765625, "epoch": 0.14, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 9.583016726910388e-07, "loss": 0.3159, "projector_lr": 2.8749050180731167e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.259765625, "rewards_train/margins": 1.109375, "rewards_train/rejected": -1.3671875, "sft_loss": 0.77734375, "step": 873 }, { "dpo_loss": 0.203125, "epoch": 0.14, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 9.582001103044763e-07, "loss": 0.4249, "projector_lr": 2.8746003309134294e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1533203125, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.453125, "sft_loss": 0.93359375, "step": 874 }, { "dpo_loss": 0.115234375, "epoch": 0.14, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 9.580984297785324e-07, "loss": 0.1128, "projector_lr": 2.8742952893355973e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.037353515625, "rewards_train/margins": 2.9375, "rewards_train/rejected": -2.90625, "sft_loss": 0.84375, "step": 875 }, { "dpo_loss": 0.166015625, "epoch": 0.14, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 9.579966311394232e-07, "loss": 0.3096, "projector_lr": 2.87398989341827e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1962890625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.4375, "sft_loss": 0.59375, "step": 876 }, { "dpo_loss": 0.31640625, "epoch": 0.14, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 9.578947144133964e-07, "loss": 0.5625, "projector_lr": 2.8736841432401896e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 1.8671875, "rewards_train/rejected": -2.578125, "sft_loss": 0.8046875, "step": 877 }, { "dpo_loss": 0.1845703125, "epoch": 0.14, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.57792679626729e-07, "loss": 0.1283, "projector_lr": 2.8733780388801876e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.125, "sft_loss": 0.8828125, "step": 878 }, { "dpo_loss": 0.578125, "epoch": 0.14, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 9.576905268057294e-07, "loss": 0.4325, "projector_lr": 2.873071580417188e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.265625, "rewards_train/margins": 1.2734375, "rewards_train/rejected": -1.5390625, "sft_loss": 0.7421875, "step": 879 }, { "dpo_loss": 0.48046875, "epoch": 0.14, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 9.57588255976736e-07, "loss": 0.4213, "projector_lr": 2.872764767930208e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 2.53125, "rewards_train/rejected": -3.03125, "sft_loss": 0.87109375, "step": 880 }, { "dpo_loss": 0.1357421875, "epoch": 0.14, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 9.574858671661174e-07, "loss": 0.3084, "projector_lr": 2.872457601498352e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.330078125, "rewards_train/margins": 3.34375, "rewards_train/rejected": -3.6875, "sft_loss": 0.6015625, "step": 881 }, { "dpo_loss": 0.2353515625, "epoch": 0.14, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 9.57383360400273e-07, "loss": 0.1886, "projector_lr": 2.872150081200819e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -2.859375, "sft_loss": 0.94921875, "step": 882 }, { "dpo_loss": 0.224609375, "epoch": 0.14, "final_loss": 0.224609375, "grad_norm": 0.0, "learning_rate": 9.572807357056325e-07, "loss": 0.2447, "projector_lr": 2.871842207116898e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.28515625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.171875, "sft_loss": 0.80078125, "step": 883 }, { "dpo_loss": 0.416015625, "epoch": 0.14, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 9.571779931086561e-07, "loss": 0.54, "projector_lr": 2.8715339793259684e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 2.046875, "rewards_train/rejected": -2.609375, "sft_loss": 0.94140625, "step": 884 }, { "dpo_loss": 0.0693359375, "epoch": 0.14, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 9.57075132635834e-07, "loss": 0.0856, "projector_lr": 2.8712253979075025e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.0625, "sft_loss": 0.86328125, "step": 885 }, { "dpo_loss": 0.061767578125, "epoch": 0.14, "final_loss": 0.061767578125, "grad_norm": 0.0, "learning_rate": 9.569721543136875e-07, "loss": 0.1607, "projector_lr": 2.8709164629410626e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.9375, "rewards_train/margins": 5.84375, "rewards_train/rejected": -4.90625, "sft_loss": 0.703125, "step": 886 }, { "dpo_loss": 0.77734375, "epoch": 0.14, "final_loss": 0.77734375, "grad_norm": 0.0, "learning_rate": 9.568690581687676e-07, "loss": 0.5542, "projector_lr": 2.8706071745063026e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 0.7890625, "rewards_train/rejected": -1.59375, "sft_loss": 0.83984375, "step": 887 }, { "dpo_loss": 0.12255859375, "epoch": 0.14, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 9.567658442276558e-07, "loss": 0.1714, "projector_lr": 2.870297532682968e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.625, "sft_loss": 0.8828125, "step": 888 }, { "dpo_loss": 0.27734375, "epoch": 0.14, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.566625125169648e-07, "loss": 0.2545, "projector_lr": 2.869987537550894e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.087890625, "rewards_train/margins": 2.578125, "rewards_train/rejected": -2.484375, "sft_loss": 0.66796875, "step": 889 }, { "dpo_loss": 0.265625, "epoch": 0.14, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.56559063063336e-07, "loss": 0.197, "projector_lr": 2.869677189190008e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 1.9375, "rewards_train/rejected": -2.6875, "sft_loss": 0.68359375, "step": 890 }, { "dpo_loss": 0.19921875, "epoch": 0.14, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 9.56455495893443e-07, "loss": 0.1735, "projector_lr": 2.8693664876803293e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.43359375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -3.234375, "sft_loss": 0.5859375, "step": 891 }, { "dpo_loss": 0.7265625, "epoch": 0.14, "final_loss": 0.7265625, "grad_norm": 0.0, "learning_rate": 9.563518110339883e-07, "loss": 0.5272, "projector_lr": 2.869055433101965e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 2.140625, "rewards_train/rejected": -2.65625, "sft_loss": 0.80859375, "step": 892 }, { "dpo_loss": 0.5703125, "epoch": 0.14, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 9.562480085117057e-07, "loss": 0.3561, "projector_lr": 2.8687440255351174e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3203125, "rewards_train/margins": 1.109375, "rewards_train/rejected": -1.4296875, "sft_loss": 1.0, "step": 893 }, { "dpo_loss": 0.4765625, "epoch": 0.14, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 9.56144088353359e-07, "loss": 0.2892, "projector_lr": 2.868432265060077e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 1.328125, "rewards_train/rejected": -2.34375, "sft_loss": 0.640625, "step": 894 }, { "dpo_loss": 0.12451171875, "epoch": 0.14, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 9.56040050585742e-07, "loss": 0.239, "projector_lr": 2.8681201517572263e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.287109375, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.015625, "sft_loss": 0.625, "step": 895 }, { "dpo_loss": 0.322265625, "epoch": 0.14, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 9.559358952356794e-07, "loss": 0.2588, "projector_lr": 2.867807685707038e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.41015625, "rewards_train/margins": 2.78125, "rewards_train/rejected": -3.1875, "sft_loss": 0.859375, "step": 896 }, { "dpo_loss": 0.08935546875, "epoch": 0.14, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 9.558316223300256e-07, "loss": 0.1187, "projector_lr": 2.867494866990077e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.140625, "rewards_train/margins": 3.625, "rewards_train/rejected": -3.765625, "sft_loss": 0.5, "step": 897 }, { "dpo_loss": 0.2451171875, "epoch": 0.14, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 9.557272318956662e-07, "loss": 0.2695, "projector_lr": 2.867181695686999e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0927734375, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.59375, "sft_loss": 0.7890625, "step": 898 }, { "dpo_loss": 0.2109375, "epoch": 0.14, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 9.556227239595163e-07, "loss": 0.1412, "projector_lr": 2.866868171878549e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -3.265625, "sft_loss": 0.92578125, "step": 899 }, { "dpo_loss": 0.158203125, "epoch": 0.14, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 9.555180985485214e-07, "loss": 0.1723, "projector_lr": 2.866554295645564e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10791015625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -2.984375, "sft_loss": 0.67578125, "step": 900 }, { "dpo_loss": 0.357421875, "epoch": 0.14, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 9.554133556896575e-07, "loss": 0.2807, "projector_lr": 2.866240067068973e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 2.359375, "rewards_train/rejected": -2.65625, "sft_loss": 0.58984375, "step": 901 }, { "dpo_loss": 0.298828125, "epoch": 0.14, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 9.553084954099311e-07, "loss": 0.3455, "projector_lr": 2.8659254862297936e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.71875, "sft_loss": 0.7265625, "step": 902 }, { "dpo_loss": 0.302734375, "epoch": 0.14, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.552035177363784e-07, "loss": 0.2408, "projector_lr": 2.8656105532091356e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.46875, "sft_loss": 0.73046875, "step": 903 }, { "dpo_loss": 0.28515625, "epoch": 0.14, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 9.550984226960662e-07, "loss": 0.2232, "projector_lr": 2.865295268088199e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1787109375, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -1.890625, "sft_loss": 0.828125, "step": 904 }, { "dpo_loss": 0.28515625, "epoch": 0.14, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 9.549932103160918e-07, "loss": 0.4384, "projector_lr": 2.8649796309482755e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.25, "sft_loss": 0.56640625, "step": 905 }, { "dpo_loss": 0.263671875, "epoch": 0.14, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 9.54887880623582e-07, "loss": 0.3028, "projector_lr": 2.864663641870747e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 1.7421875, "rewards_train/rejected": -2.4375, "sft_loss": 0.86328125, "step": 906 }, { "dpo_loss": 0.470703125, "epoch": 0.15, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 9.54782433645695e-07, "loss": 0.2705, "projector_lr": 2.864347300937085e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 1.703125, "rewards_train/rejected": -2.234375, "sft_loss": 0.8125, "step": 907 }, { "dpo_loss": 0.126953125, "epoch": 0.15, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 9.54676869409618e-07, "loss": 0.5319, "projector_lr": 2.8640306082288544e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -3.859375, "sft_loss": 0.7578125, "step": 908 }, { "dpo_loss": 0.1416015625, "epoch": 0.15, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 9.545711879425693e-07, "loss": 0.1606, "projector_lr": 2.8637135638277083e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.265625, "sft_loss": 0.59765625, "step": 909 }, { "dpo_loss": 0.466796875, "epoch": 0.15, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 9.544653892717968e-07, "loss": 0.3237, "projector_lr": 2.863396167815391e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.609375, "sft_loss": 0.73828125, "step": 910 }, { "dpo_loss": 0.154296875, "epoch": 0.15, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 9.543594734245795e-07, "loss": 0.1509, "projector_lr": 2.8630784202737384e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.30859375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.15625, "sft_loss": 0.50390625, "step": 911 }, { "dpo_loss": 0.2578125, "epoch": 0.15, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.542534404282255e-07, "loss": 0.5, "projector_lr": 2.862760321284677e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.037841796875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -3.140625, "sft_loss": 0.9765625, "step": 912 }, { "dpo_loss": 0.1806640625, "epoch": 0.15, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 9.541472903100743e-07, "loss": 0.2728, "projector_lr": 2.862441870930223e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.058837890625, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.046875, "sft_loss": 0.890625, "step": 913 }, { "dpo_loss": 0.1240234375, "epoch": 0.15, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 9.540410230974943e-07, "loss": 0.236, "projector_lr": 2.862123069292483e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.8125, "sft_loss": 0.67578125, "step": 914 }, { "dpo_loss": 0.9296875, "epoch": 0.15, "final_loss": 0.9296875, "grad_norm": 0.0, "learning_rate": 9.53934638817885e-07, "loss": 0.6801, "projector_lr": 2.8618039164536553e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.03125, "rewards_train/margins": 0.396484375, "rewards_train/rejected": -1.4296875, "sft_loss": 0.85546875, "step": 915 }, { "dpo_loss": 0.41796875, "epoch": 0.15, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.53828137498676e-07, "loss": 0.528, "projector_lr": 2.8614844124960283e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.333984375, "rewards_train/margins": 1.5859375, "rewards_train/rejected": -1.921875, "sft_loss": 0.71484375, "step": 916 }, { "dpo_loss": 0.62890625, "epoch": 0.15, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 9.537215191673268e-07, "loss": 0.6491, "projector_lr": 2.861164557501981e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 1.1953125, "rewards_train/rejected": -2.015625, "sft_loss": 0.640625, "step": 917 }, { "dpo_loss": 0.265625, "epoch": 0.15, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.536147838513273e-07, "loss": 0.2422, "projector_lr": 2.8608443515539823e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.28515625, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.5, "sft_loss": 0.59375, "step": 918 }, { "dpo_loss": 0.68359375, "epoch": 0.15, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 9.535079315781973e-07, "loss": 0.5829, "projector_lr": 2.860523794734592e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.0252685546875, "rewards_train/margins": 1.328125, "rewards_train/rejected": -1.3046875, "sft_loss": 0.97265625, "step": 919 }, { "dpo_loss": 0.419921875, "epoch": 0.15, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 9.534009623754868e-07, "loss": 0.4498, "projector_lr": 2.860202887126461e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.375, "sft_loss": 0.77734375, "step": 920 }, { "dpo_loss": 0.83203125, "epoch": 0.15, "final_loss": 0.83203125, "grad_norm": 0.0, "learning_rate": 9.532938762707765e-07, "loss": 0.7125, "projector_lr": 2.8598816288123298e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 0.79296875, "rewards_train/rejected": -1.75, "sft_loss": 0.88671875, "step": 921 }, { "dpo_loss": 0.21875, "epoch": 0.15, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 9.531866732916762e-07, "loss": 0.1742, "projector_lr": 2.859560019875029e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.515625, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.765625, "sft_loss": 0.80078125, "step": 922 }, { "dpo_loss": 0.302734375, "epoch": 0.15, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.530793534658269e-07, "loss": 0.374, "projector_lr": 2.859238060397481e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.011962890625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.34375, "sft_loss": 0.5234375, "step": 923 }, { "dpo_loss": 0.291015625, "epoch": 0.15, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 9.52971916820899e-07, "loss": 0.1802, "projector_lr": 2.8589157504626973e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.53125, "rewards_train/margins": 1.640625, "rewards_train/rejected": -2.171875, "sft_loss": 0.92578125, "step": 924 }, { "dpo_loss": 0.1669921875, "epoch": 0.15, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 9.528643633845934e-07, "loss": 0.2412, "projector_lr": 2.8585930901537803e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.232421875, "rewards_train/margins": 2.375, "rewards_train/rejected": -2.140625, "sft_loss": 0.48828125, "step": 925 }, { "dpo_loss": 0.3046875, "epoch": 0.15, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.52756693184641e-07, "loss": 0.158, "projector_lr": 2.8582700795539233e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2001953125, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.140625, "sft_loss": 0.9296875, "step": 926 }, { "dpo_loss": 0.12109375, "epoch": 0.15, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 9.526489062488027e-07, "loss": 0.1293, "projector_lr": 2.8579467187464084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4609375, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.71875, "sft_loss": 0.73046875, "step": 927 }, { "dpo_loss": 0.271484375, "epoch": 0.15, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 9.525410026048697e-07, "loss": 0.5084, "projector_lr": 2.8576230078146095e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.3125, "rewards_train/margins": 3.453125, "rewards_train/rejected": -3.140625, "sft_loss": 0.796875, "step": 928 }, { "dpo_loss": 0.65625, "epoch": 0.15, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 9.524329822806632e-07, "loss": 0.5085, "projector_lr": 2.8572989468419896e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 1.7734375, "rewards_train/rejected": -1.953125, "sft_loss": 0.61328125, "step": 929 }, { "dpo_loss": 0.36328125, "epoch": 0.15, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 9.523248453040342e-07, "loss": 0.3252, "projector_lr": 2.856974535912103e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.455078125, "rewards_train/margins": 1.828125, "rewards_train/rejected": -2.28125, "sft_loss": 0.70703125, "step": 930 }, { "dpo_loss": 0.1611328125, "epoch": 0.15, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 9.522165917028642e-07, "loss": 0.1007, "projector_lr": 2.856649775108593e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.018798828125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.578125, "sft_loss": 0.640625, "step": 931 }, { "dpo_loss": 0.34765625, "epoch": 0.15, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 9.521082215050649e-07, "loss": 0.4471, "projector_lr": 2.856324664515195e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0029449462890625, "rewards_train/margins": 1.046875, "rewards_train/rejected": -1.046875, "sft_loss": 0.5546875, "step": 932 }, { "dpo_loss": 0.298828125, "epoch": 0.15, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 9.519997347385774e-07, "loss": 0.2346, "projector_lr": 2.8559992042157324e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.06787109375, "rewards_train/margins": 1.765625, "rewards_train/rejected": -1.6953125, "sft_loss": 0.96875, "step": 933 }, { "dpo_loss": 0.283203125, "epoch": 0.15, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 9.518911314313733e-07, "loss": 0.4022, "projector_lr": 2.85567339429412e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 1.9921875, "rewards_train/rejected": -2.25, "sft_loss": 0.61328125, "step": 934 }, { "dpo_loss": 0.2451171875, "epoch": 0.15, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 9.517824116114543e-07, "loss": 0.2558, "projector_lr": 2.855347234834363e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.061767578125, "rewards_train/margins": 1.8125, "rewards_train/rejected": -1.75, "sft_loss": 0.578125, "step": 935 }, { "dpo_loss": 0.322265625, "epoch": 0.15, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 9.516735753068521e-07, "loss": 0.4304, "projector_lr": 2.8550207259205566e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.7578125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -2.84375, "sft_loss": 0.6953125, "step": 936 }, { "dpo_loss": 0.1494140625, "epoch": 0.15, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 9.51564622545628e-07, "loss": 0.1834, "projector_lr": 2.8546938676368843e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.068359375, "rewards_train/margins": 2.953125, "rewards_train/rejected": -3.015625, "sft_loss": 0.51953125, "step": 937 }, { "dpo_loss": 0.1689453125, "epoch": 0.15, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 9.514555533558742e-07, "loss": 0.2045, "projector_lr": 2.8543666600676228e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.375, "rewards_train/margins": 2.9375, "rewards_train/rejected": -2.5625, "sft_loss": 0.75, "step": 938 }, { "dpo_loss": 0.2412109375, "epoch": 0.15, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 9.513463677657119e-07, "loss": 0.1559, "projector_lr": 2.854039103297136e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.111328125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.65625, "sft_loss": 0.66015625, "step": 939 }, { "dpo_loss": 0.412109375, "epoch": 0.15, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 9.512370658032931e-07, "loss": 0.238, "projector_lr": 2.8537111974098794e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.083984375, "rewards_train/margins": 1.609375, "rewards_train/rejected": -1.5234375, "sft_loss": 0.58203125, "step": 940 }, { "dpo_loss": 0.466796875, "epoch": 0.15, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 9.511276474967993e-07, "loss": 0.3816, "projector_lr": 2.853382942490398e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 0.921875, "rewards_train/rejected": -1.2734375, "sft_loss": 0.71875, "step": 941 }, { "dpo_loss": 0.14453125, "epoch": 0.15, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 9.510181128744425e-07, "loss": 0.2068, "projector_lr": 2.8530543386233277e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.000244140625, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.59375, "sft_loss": 0.7578125, "step": 942 }, { "dpo_loss": 0.205078125, "epoch": 0.15, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 9.509084619644643e-07, "loss": 0.2254, "projector_lr": 2.852725385893393e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.42578125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -3.359375, "sft_loss": 0.828125, "step": 943 }, { "dpo_loss": 0.12890625, "epoch": 0.15, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 9.507986947951362e-07, "loss": 0.0899, "projector_lr": 2.8523960843854087e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2001953125, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.4375, "sft_loss": 0.92578125, "step": 944 }, { "dpo_loss": 0.05029296875, "epoch": 0.15, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 9.506888113947599e-07, "loss": 0.1545, "projector_lr": 2.8520664341842803e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.53125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -3.65625, "sft_loss": 0.5390625, "step": 945 }, { "dpo_loss": 0.162109375, "epoch": 0.15, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 9.505788117916674e-07, "loss": 0.1972, "projector_lr": 2.8517364353750025e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.58203125, "rewards_train/margins": 3.5, "rewards_train/rejected": -2.921875, "sft_loss": 1.09375, "step": 946 }, { "dpo_loss": 0.11962890625, "epoch": 0.15, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 9.504686960142201e-07, "loss": 0.2433, "projector_lr": 2.8514060880426604e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2216796875, "rewards_train/margins": 3.171875, "rewards_train/rejected": -2.953125, "sft_loss": 0.859375, "step": 947 }, { "dpo_loss": 0.169921875, "epoch": 0.15, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 9.503584640908094e-07, "loss": 0.193, "projector_lr": 2.851075392272428e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 3.109375, "rewards_train/rejected": -3.4375, "sft_loss": 0.85546875, "step": 948 }, { "dpo_loss": 0.12890625, "epoch": 0.15, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 9.502481160498568e-07, "loss": 0.2065, "projector_lr": 2.8507443481495704e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.20703125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -3.859375, "sft_loss": 0.59375, "step": 949 }, { "dpo_loss": 0.5703125, "epoch": 0.15, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 9.501376519198136e-07, "loss": 0.4092, "projector_lr": 2.850412955759441e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 1.5859375, "rewards_train/rejected": -2.46875, "sft_loss": 0.71875, "step": 950 }, { "dpo_loss": 0.294921875, "epoch": 0.15, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.500270717291617e-07, "loss": 0.2094, "projector_lr": 2.8500812151874854e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.20703125, "rewards_train/margins": 1.6875, "rewards_train/rejected": -1.484375, "sft_loss": 0.9375, "step": 951 }, { "dpo_loss": 0.1689453125, "epoch": 0.15, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 9.49916375506412e-07, "loss": 0.19, "projector_lr": 2.8497491265192364e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1298828125, "rewards_train/margins": 2.46875, "rewards_train/rejected": -2.59375, "sft_loss": 0.63671875, "step": 952 }, { "dpo_loss": 0.19140625, "epoch": 0.15, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 9.498055632801057e-07, "loss": 0.3005, "projector_lr": 2.8494166898403173e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07177734375, "rewards_train/margins": 2.890625, "rewards_train/rejected": -2.953125, "sft_loss": 0.65234375, "step": 953 }, { "dpo_loss": 0.4453125, "epoch": 0.15, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 9.49694635078814e-07, "loss": 0.5867, "projector_lr": 2.849083905236442e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.25, "rewards_train/margins": 1.6796875, "rewards_train/rejected": -1.9296875, "sft_loss": 0.84375, "step": 954 }, { "dpo_loss": 0.357421875, "epoch": 0.15, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 9.495835909311381e-07, "loss": 0.2624, "projector_lr": 2.8487507727934144e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.025390625, "rewards_train/margins": 1.453125, "rewards_train/rejected": -1.421875, "sft_loss": 0.5, "step": 955 }, { "dpo_loss": 0.181640625, "epoch": 0.15, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 9.494724308657083e-07, "loss": 0.1036, "projector_lr": 2.8484172925971254e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.10546875, "rewards_train/margins": 2.796875, "rewards_train/rejected": -2.6875, "sft_loss": 0.5625, "step": 956 }, { "dpo_loss": 0.109375, "epoch": 0.15, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 9.493611549111861e-07, "loss": 0.1676, "projector_lr": 2.848083464733559e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.5625, "sft_loss": 0.8984375, "step": 957 }, { "dpo_loss": 0.2578125, "epoch": 0.15, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.49249763096262e-07, "loss": 0.2486, "projector_lr": 2.847749289288786e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.9375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -1.7890625, "sft_loss": 0.72265625, "step": 958 }, { "dpo_loss": 0.421875, "epoch": 0.15, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 9.491382554496561e-07, "loss": 0.2588, "projector_lr": 2.8474147663489683e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0810546875, "rewards_train/margins": 1.265625, "rewards_train/rejected": -1.1875, "sft_loss": 0.55078125, "step": 959 }, { "dpo_loss": 0.2109375, "epoch": 0.15, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 9.490266320001194e-07, "loss": 0.1118, "projector_lr": 2.8470798960003584e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.291015625, "rewards_train/margins": 2.078125, "rewards_train/rejected": -1.796875, "sft_loss": 0.70703125, "step": 960 }, { "dpo_loss": 0.1689453125, "epoch": 0.15, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 9.489148927764319e-07, "loss": 0.3817, "projector_lr": 2.8467446783292958e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40625, "rewards_train/margins": 2.46875, "rewards_train/rejected": -2.875, "sft_loss": 0.5859375, "step": 961 }, { "dpo_loss": 0.625, "epoch": 0.15, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 9.488030378074037e-07, "loss": 0.6313, "projector_lr": 2.846409113422211e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 1.25, "rewards_train/rejected": -1.671875, "sft_loss": 0.6953125, "step": 962 }, { "dpo_loss": 0.11083984375, "epoch": 0.15, "final_loss": 0.11083984375, "grad_norm": 0.0, "learning_rate": 9.486910671218748e-07, "loss": 0.073, "projector_lr": 2.8460732013656246e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3359375, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.921875, "sft_loss": 0.8828125, "step": 963 }, { "dpo_loss": 0.287109375, "epoch": 0.15, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 9.48578980748715e-07, "loss": 0.3873, "projector_lr": 2.8457369422461453e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1103515625, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.53125, "sft_loss": 0.58984375, "step": 964 }, { "dpo_loss": 0.1552734375, "epoch": 0.15, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 9.484667787168242e-07, "loss": 0.2034, "projector_lr": 2.8454003361504727e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.10009765625, "rewards_train/margins": 3.546875, "rewards_train/rejected": -3.453125, "sft_loss": 0.59765625, "step": 965 }, { "dpo_loss": 0.490234375, "epoch": 0.15, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 9.483544610551314e-07, "loss": 0.3568, "projector_lr": 2.8450633831653945e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.283203125, "rewards_train/margins": 3.015625, "rewards_train/rejected": -3.296875, "sft_loss": 0.92578125, "step": 966 }, { "dpo_loss": 0.275390625, "epoch": 0.15, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 9.482420277925962e-07, "loss": 0.204, "projector_lr": 2.844726083377789e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.234375, "sft_loss": 0.640625, "step": 967 }, { "dpo_loss": 0.40625, "epoch": 0.15, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 9.481294789582075e-07, "loss": 0.3761, "projector_lr": 2.8443884368746227e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.298828125, "rewards_train/margins": 1.9921875, "rewards_train/rejected": -2.296875, "sft_loss": 0.73046875, "step": 968 }, { "dpo_loss": 0.031494140625, "epoch": 0.16, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 9.480168145809841e-07, "loss": 0.2085, "projector_lr": 2.844050443742953e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.028564453125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.21875, "sft_loss": 0.74609375, "step": 969 }, { "dpo_loss": 0.244140625, "epoch": 0.16, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 9.47904034689975e-07, "loss": 0.2123, "projector_lr": 2.8437121040699254e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.34375, "sft_loss": 0.58984375, "step": 970 }, { "dpo_loss": 0.1416015625, "epoch": 0.16, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 9.477911393142583e-07, "loss": 0.1663, "projector_lr": 2.8433734179427754e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1875, "rewards_train/margins": 2.640625, "rewards_train/rejected": -2.453125, "sft_loss": 0.71875, "step": 971 }, { "dpo_loss": 0.197265625, "epoch": 0.16, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 9.476781284829425e-07, "loss": 0.3636, "projector_lr": 2.8430343854488278e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2001953125, "rewards_train/margins": 2.890625, "rewards_train/rejected": -2.6875, "sft_loss": 0.5546875, "step": 972 }, { "dpo_loss": 0.4375, "epoch": 0.16, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 9.475650022251653e-07, "loss": 0.2795, "projector_lr": 2.8426950066754963e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.04931640625, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.21875, "sft_loss": 0.59375, "step": 973 }, { "dpo_loss": 0.2490234375, "epoch": 0.16, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 9.474517605700946e-07, "loss": 0.3494, "projector_lr": 2.842355281710284e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.37109375, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.375, "sft_loss": 0.7421875, "step": 974 }, { "dpo_loss": 0.404296875, "epoch": 0.16, "final_loss": 0.404296875, "grad_norm": 0.0, "learning_rate": 9.473384035469281e-07, "loss": 0.2658, "projector_lr": 2.842015210640784e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.30078125, "rewards_train/margins": 1.796875, "rewards_train/rejected": -2.09375, "sft_loss": 0.6328125, "step": 975 }, { "dpo_loss": 0.0908203125, "epoch": 0.16, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 9.472249311848927e-07, "loss": 0.2266, "projector_lr": 2.841674793554678e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.419921875, "rewards_train/margins": 3.25, "rewards_train/rejected": -2.828125, "sft_loss": 0.68359375, "step": 976 }, { "dpo_loss": 0.671875, "epoch": 0.16, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 9.471113435132454e-07, "loss": 0.5027, "projector_lr": 2.8413340305397364e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.546875, "rewards_train/margins": 0.94140625, "rewards_train/rejected": -1.484375, "sft_loss": 0.58984375, "step": 977 }, { "dpo_loss": 0.296875, "epoch": 0.16, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 9.469976405612732e-07, "loss": 0.2631, "projector_lr": 2.84099292168382e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.234375, "rewards_train/margins": 1.9765625, "rewards_train/rejected": -2.203125, "sft_loss": 0.86328125, "step": 978 }, { "dpo_loss": 0.21484375, "epoch": 0.16, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 9.468838223582925e-07, "loss": 0.2245, "projector_lr": 2.8406514670748777e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.25390625, "rewards_train/margins": 3.515625, "rewards_train/rejected": -3.265625, "sft_loss": 0.83203125, "step": 979 }, { "dpo_loss": 0.115234375, "epoch": 0.16, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 9.467698889336491e-07, "loss": 0.1494, "projector_lr": 2.8403096668009474e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04052734375, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.28125, "sft_loss": 0.7890625, "step": 980 }, { "dpo_loss": 0.11962890625, "epoch": 0.16, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 9.466558403167193e-07, "loss": 0.12, "projector_lr": 2.839967520950158e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.25390625, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.203125, "sft_loss": 0.58203125, "step": 981 }, { "dpo_loss": 0.259765625, "epoch": 0.16, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 9.465416765369083e-07, "loss": 0.1618, "projector_lr": 2.839625029610725e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.75, "sft_loss": 0.671875, "step": 982 }, { "dpo_loss": 0.306640625, "epoch": 0.16, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 9.464273976236516e-07, "loss": 0.235, "projector_lr": 2.839282192870955e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 1.828125, "rewards_train/rejected": -2.234375, "sft_loss": 0.72265625, "step": 983 }, { "dpo_loss": 0.734375, "epoch": 0.16, "final_loss": 0.734375, "grad_norm": 0.0, "learning_rate": 9.463130036064142e-07, "loss": 0.687, "projector_lr": 2.838939010819243e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 0.859375, "rewards_train/rejected": -1.4453125, "sft_loss": 0.6328125, "step": 984 }, { "dpo_loss": 0.265625, "epoch": 0.16, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.461984945146905e-07, "loss": 0.2881, "projector_lr": 2.8385954835440718e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.240234375, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.5625, "sft_loss": 1.0234375, "step": 985 }, { "dpo_loss": 0.478515625, "epoch": 0.16, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 9.460838703780049e-07, "loss": 0.5021, "projector_lr": 2.838251611134015e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.375, "sft_loss": 0.78125, "step": 986 }, { "dpo_loss": 0.1298828125, "epoch": 0.16, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 9.459691312259114e-07, "loss": 0.195, "projector_lr": 2.8379073936777344e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -4.90625, "sft_loss": 0.71484375, "step": 987 }, { "dpo_loss": 0.6328125, "epoch": 0.16, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 9.458542770879935e-07, "loss": 0.5365, "projector_lr": 2.8375628312639805e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 1.5, "rewards_train/rejected": -2.1875, "sft_loss": 0.69921875, "step": 988 }, { "dpo_loss": 0.142578125, "epoch": 0.16, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 9.457393079938645e-07, "loss": 0.1864, "projector_lr": 2.8372179239815935e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.224609375, "rewards_train/margins": 3.171875, "rewards_train/rejected": -3.390625, "sft_loss": 0.83984375, "step": 989 }, { "dpo_loss": 0.302734375, "epoch": 0.16, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.456242239731674e-07, "loss": 0.2144, "projector_lr": 2.836872671919502e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.703125, "rewards_train/margins": 1.953125, "rewards_train/rejected": -2.65625, "sft_loss": 0.69921875, "step": 990 }, { "dpo_loss": 0.0947265625, "epoch": 0.16, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 9.455090250555744e-07, "loss": 0.2909, "projector_lr": 2.8365270751667235e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.34375, "sft_loss": 0.97265625, "step": 991 }, { "dpo_loss": 0.185546875, "epoch": 0.16, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 9.453937112707879e-07, "loss": 0.4213, "projector_lr": 2.836181133812364e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.267578125, "rewards_train/margins": 2.890625, "rewards_train/rejected": -3.15625, "sft_loss": 0.62890625, "step": 992 }, { "dpo_loss": 0.27734375, "epoch": 0.16, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.4527828264854e-07, "loss": 0.3688, "projector_lr": 2.83583484794562e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.37109375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.09375, "sft_loss": 0.74609375, "step": 993 }, { "dpo_loss": 0.427734375, "epoch": 0.16, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 9.451627392185913e-07, "loss": 0.2746, "projector_lr": 2.835488217655774e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.224609375, "rewards_train/margins": 1.0390625, "rewards_train/rejected": -1.265625, "sft_loss": 0.6171875, "step": 994 }, { "dpo_loss": 0.251953125, "epoch": 0.16, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 9.450470810107333e-07, "loss": 0.3088, "projector_lr": 2.8351412430322004e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.296875, "rewards_train/margins": 2.75, "rewards_train/rejected": -2.46875, "sft_loss": 0.53515625, "step": 995 }, { "dpo_loss": 0.1328125, "epoch": 0.16, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 9.449313080547867e-07, "loss": 0.2192, "projector_lr": 2.8347939241643603e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1416015625, "rewards_train/margins": 2.75, "rewards_train/rejected": -2.609375, "sft_loss": 0.69140625, "step": 996 }, { "dpo_loss": 0.353515625, "epoch": 0.16, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 9.448154203806011e-07, "loss": 0.3609, "projector_lr": 2.834446261141804e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.140625, "rewards_train/margins": 1.875, "rewards_train/rejected": -2.015625, "sft_loss": 0.5625, "step": 997 }, { "dpo_loss": 0.10302734375, "epoch": 0.16, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 9.446994180180569e-07, "loss": 0.1189, "projector_lr": 2.8340982540541706e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.671875, "sft_loss": 0.54296875, "step": 998 }, { "dpo_loss": 0.1376953125, "epoch": 0.16, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 9.445833009970628e-07, "loss": 0.1951, "projector_lr": 2.8337499029911887e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0064697265625, "rewards_train/margins": 2.703125, "rewards_train/rejected": -2.71875, "sft_loss": 0.671875, "step": 999 }, { "dpo_loss": 0.08349609375, "epoch": 0.16, "final_loss": 0.08349609375, "grad_norm": 0.0, "learning_rate": 9.44467069347558e-07, "loss": 0.1099, "projector_lr": 2.8334012080426744e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4765625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.75, "sft_loss": 0.62890625, "step": 1000 }, { "dpo_loss": 0.1591796875, "epoch": 0.16, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 9.44350723099511e-07, "loss": 0.2872, "projector_lr": 2.8330521692985334e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.12158203125, "rewards_train/margins": 2.53125, "rewards_train/rejected": -2.40625, "sft_loss": 0.67578125, "step": 1001 }, { "dpo_loss": 0.07373046875, "epoch": 0.16, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 9.442342622829196e-07, "loss": 0.0656, "projector_lr": 2.832702786848759e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.251953125, "rewards_train/margins": 4.25, "rewards_train/rejected": -3.984375, "sft_loss": 0.76171875, "step": 1002 }, { "dpo_loss": 0.29296875, "epoch": 0.16, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 9.441176869278114e-07, "loss": 0.3133, "projector_lr": 2.832353060783434e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 1.8125, "rewards_train/rejected": -2.609375, "sft_loss": 0.8046875, "step": 1003 }, { "dpo_loss": 0.10693359375, "epoch": 0.16, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 9.440009970642432e-07, "loss": 0.0931, "projector_lr": 2.8320029911927297e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.06884765625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -3.984375, "sft_loss": 0.78515625, "step": 1004 }, { "dpo_loss": 0.5078125, "epoch": 0.16, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.438841927223018e-07, "loss": 0.3869, "projector_lr": 2.8316525781669058e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.17578125, "rewards_train/margins": 2.15625, "rewards_train/rejected": -2.328125, "sft_loss": 0.6640625, "step": 1005 }, { "dpo_loss": 0.1396484375, "epoch": 0.16, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 9.437672739321033e-07, "loss": 0.1747, "projector_lr": 2.83130182179631e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.05517578125, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.359375, "sft_loss": 0.54296875, "step": 1006 }, { "dpo_loss": 0.12890625, "epoch": 0.16, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 9.436502407237931e-07, "loss": 0.3706, "projector_lr": 2.8309507221713796e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.037109375, "rewards_train/margins": 3.25, "rewards_train/rejected": -3.28125, "sft_loss": 0.6953125, "step": 1007 }, { "dpo_loss": 0.32421875, "epoch": 0.16, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 9.435330931275467e-07, "loss": 0.217, "projector_lr": 2.8305992793826404e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.046142578125, "rewards_train/margins": 1.6875, "rewards_train/rejected": -1.734375, "sft_loss": 1.0703125, "step": 1008 }, { "dpo_loss": 0.2890625, "epoch": 0.16, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.434158311735683e-07, "loss": 0.2452, "projector_lr": 2.830247493520705e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.490234375, "rewards_train/margins": 2.46875, "rewards_train/rejected": -1.9921875, "sft_loss": 0.7578125, "step": 1009 }, { "dpo_loss": 0.6484375, "epoch": 0.16, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 9.43298454892092e-07, "loss": 0.3869, "projector_lr": 2.829895364676276e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 0.84375, "rewards_train/rejected": -1.296875, "sft_loss": 0.83984375, "step": 1010 }, { "dpo_loss": 0.390625, "epoch": 0.16, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 9.431809643133814e-07, "loss": 0.426, "projector_lr": 2.8295428929401447e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 1.8515625, "rewards_train/rejected": -3.140625, "sft_loss": 0.703125, "step": 1011 }, { "dpo_loss": 0.3984375, "epoch": 0.16, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 9.430633594677296e-07, "loss": 0.3292, "projector_lr": 2.8291900784031887e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 1.40625, "rewards_train/rejected": -2.234375, "sft_loss": 0.69921875, "step": 1012 }, { "dpo_loss": 0.1015625, "epoch": 0.16, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 9.429456403854589e-07, "loss": 0.2984, "projector_lr": 2.828836921156377e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05029296875, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.375, "sft_loss": 0.6015625, "step": 1013 }, { "dpo_loss": 0.30078125, "epoch": 0.16, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.428278070969217e-07, "loss": 0.3155, "projector_lr": 2.8284834212907652e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -3.296875, "sft_loss": 0.64453125, "step": 1014 }, { "dpo_loss": 0.1416015625, "epoch": 0.16, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 9.427098596324987e-07, "loss": 0.2766, "projector_lr": 2.8281295788974962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23046875, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.234375, "sft_loss": 0.75, "step": 1015 }, { "dpo_loss": 0.1162109375, "epoch": 0.16, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 9.425917980226013e-07, "loss": 0.0781, "projector_lr": 2.827775394067804e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.328125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -3.4375, "sft_loss": 0.49609375, "step": 1016 }, { "dpo_loss": 0.369140625, "epoch": 0.16, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.424736222976695e-07, "loss": 0.2267, "projector_lr": 2.8274208668930085e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 1.96875, "rewards_train/rejected": -2.71875, "sft_loss": 0.7265625, "step": 1017 }, { "dpo_loss": 0.11669921875, "epoch": 0.16, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 9.423553324881728e-07, "loss": 0.1077, "projector_lr": 2.8270659974645186e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1123046875, "rewards_train/margins": 3.09375, "rewards_train/rejected": -2.96875, "sft_loss": 0.4921875, "step": 1018 }, { "dpo_loss": 0.1689453125, "epoch": 0.16, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 9.422369286246105e-07, "loss": 0.3742, "projector_lr": 2.826710785873832e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.140625, "sft_loss": 0.7890625, "step": 1019 }, { "dpo_loss": 0.462890625, "epoch": 0.16, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 9.421184107375113e-07, "loss": 0.3846, "projector_lr": 2.826355232212534e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.828125, "sft_loss": 0.76953125, "step": 1020 }, { "dpo_loss": 0.3828125, "epoch": 0.16, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 9.419997788574327e-07, "loss": 0.3828, "projector_lr": 2.825999336572298e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 1.515625, "rewards_train/rejected": -2.09375, "sft_loss": 0.86328125, "step": 1021 }, { "dpo_loss": 0.64453125, "epoch": 0.16, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 9.41881033014962e-07, "loss": 0.4799, "projector_lr": 2.8256430990448862e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.578125, "rewards_train/margins": 1.40625, "rewards_train/rejected": -1.9921875, "sft_loss": 0.8671875, "step": 1022 }, { "dpo_loss": 0.048828125, "epoch": 0.16, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 9.417621732407162e-07, "loss": 0.053, "projector_lr": 2.8252865197221487e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.294921875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.6875, "sft_loss": 0.6328125, "step": 1023 }, { "dpo_loss": 0.294921875, "epoch": 0.16, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 9.416431995653411e-07, "loss": 0.3508, "projector_lr": 2.8249295986960233e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.125, "rewards_train/margins": 2.828125, "rewards_train/rejected": -2.703125, "sft_loss": 0.69140625, "step": 1024 }, { "dpo_loss": 0.15234375, "epoch": 0.16, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 9.415241120195121e-07, "loss": 0.1092, "projector_lr": 2.8245723360585364e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.84375, "sft_loss": 0.5625, "step": 1025 }, { "dpo_loss": 0.27734375, "epoch": 0.16, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.414049106339341e-07, "loss": 0.1783, "projector_lr": 2.8242147319018027e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3671875, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.234375, "sft_loss": 0.7890625, "step": 1026 }, { "dpo_loss": 0.1787109375, "epoch": 0.16, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 9.412855954393413e-07, "loss": 0.1317, "projector_lr": 2.823856786318024e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.208984375, "rewards_train/margins": 3.171875, "rewards_train/rejected": -2.96875, "sft_loss": 0.5546875, "step": 1027 }, { "dpo_loss": 0.126953125, "epoch": 0.16, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 9.411661664664967e-07, "loss": 0.2122, "projector_lr": 2.8234984993994903e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.236328125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.0, "sft_loss": 0.6484375, "step": 1028 }, { "dpo_loss": 0.064453125, "epoch": 0.16, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 9.410466237461936e-07, "loss": 0.0648, "projector_lr": 2.823139871238581e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33984375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.375, "sft_loss": 0.85546875, "step": 1029 }, { "dpo_loss": 0.33984375, "epoch": 0.16, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 9.409269673092539e-07, "loss": 0.4196, "projector_lr": 2.822780901927762e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53125, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.328125, "sft_loss": 0.7890625, "step": 1030 }, { "dpo_loss": 0.337890625, "epoch": 0.16, "final_loss": 0.337890625, "grad_norm": 0.0, "learning_rate": 9.408071971865293e-07, "loss": 0.2456, "projector_lr": 2.822421591559588e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.4375, "sft_loss": 0.70703125, "step": 1031 }, { "dpo_loss": 0.2294921875, "epoch": 0.17, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 9.406873134089002e-07, "loss": 0.4582, "projector_lr": 2.822061940226701e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.703125, "sft_loss": 0.62890625, "step": 1032 }, { "dpo_loss": 0.28125, "epoch": 0.17, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.405673160072767e-07, "loss": 0.332, "projector_lr": 2.82170194802183e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 2.296875, "rewards_train/rejected": -3.25, "sft_loss": 0.68359375, "step": 1033 }, { "dpo_loss": 0.09716796875, "epoch": 0.17, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 9.404472050125984e-07, "loss": 0.2117, "projector_lr": 2.8213416150377957e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0556640625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -3.6875, "sft_loss": 0.953125, "step": 1034 }, { "dpo_loss": 0.18359375, "epoch": 0.17, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 9.403269804558338e-07, "loss": 0.2793, "projector_lr": 2.8209809413675017e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.27734375, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.015625, "sft_loss": 0.68359375, "step": 1035 }, { "dpo_loss": 0.091796875, "epoch": 0.17, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 9.402066423679809e-07, "loss": 0.1453, "projector_lr": 2.820619927103943e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.71875, "sft_loss": 0.85546875, "step": 1036 }, { "dpo_loss": 0.173828125, "epoch": 0.17, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 9.40086190780067e-07, "loss": 0.3488, "projector_lr": 2.820258572340201e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0849609375, "rewards_train/margins": 3.546875, "rewards_train/rejected": -3.640625, "sft_loss": 1.015625, "step": 1037 }, { "dpo_loss": 0.06298828125, "epoch": 0.17, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 9.399656257231483e-07, "loss": 0.1112, "projector_lr": 2.819896877169445e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1689453125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.0625, "sft_loss": 0.88671875, "step": 1038 }, { "dpo_loss": 0.5234375, "epoch": 0.17, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.398449472283106e-07, "loss": 0.2987, "projector_lr": 2.819534841684932e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.515625, "sft_loss": 0.6953125, "step": 1039 }, { "dpo_loss": 0.1787109375, "epoch": 0.17, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 9.397241553266691e-07, "loss": 0.1292, "projector_lr": 2.8191724659800074e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47265625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.296875, "sft_loss": 0.63671875, "step": 1040 }, { "dpo_loss": 0.439453125, "epoch": 0.17, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 9.396032500493677e-07, "loss": 0.2642, "projector_lr": 2.8188097501481033e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 1.921875, "rewards_train/rejected": -3.265625, "sft_loss": 0.71875, "step": 1041 }, { "dpo_loss": 0.1552734375, "epoch": 0.17, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 9.3948223142758e-07, "loss": 0.1079, "projector_lr": 2.81844669428274e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.333984375, "rewards_train/margins": 3.078125, "rewards_train/rejected": -2.734375, "sft_loss": 0.76953125, "step": 1042 }, { "dpo_loss": 0.373046875, "epoch": 0.17, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 9.393610994925087e-07, "loss": 0.3985, "projector_lr": 2.818083298477526e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 2.390625, "rewards_train/rejected": -3.1875, "sft_loss": 0.7578125, "step": 1043 }, { "dpo_loss": 0.6015625, "epoch": 0.17, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 9.392398542753854e-07, "loss": 0.4435, "projector_lr": 2.8177195628261563e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.640625, "sft_loss": 0.8828125, "step": 1044 }, { "dpo_loss": 0.19140625, "epoch": 0.17, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 9.391184958074717e-07, "loss": 0.1578, "projector_lr": 2.8173554874224152e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.34375, "sft_loss": 0.6796875, "step": 1045 }, { "dpo_loss": 0.470703125, "epoch": 0.17, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 9.389970241200575e-07, "loss": 0.349, "projector_lr": 2.816991072360173e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.322265625, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -2.25, "sft_loss": 0.96875, "step": 1046 }, { "dpo_loss": 0.1982421875, "epoch": 0.17, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 9.388754392444625e-07, "loss": 0.2375, "projector_lr": 2.8166263177333877e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -3.09375, "sft_loss": 0.828125, "step": 1047 }, { "dpo_loss": 0.05517578125, "epoch": 0.17, "final_loss": 0.05517578125, "grad_norm": 0.0, "learning_rate": 9.387537412120351e-07, "loss": 0.1153, "projector_lr": 2.8162612236361053e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.15625, "sft_loss": 0.65625, "step": 1048 }, { "dpo_loss": 0.255859375, "epoch": 0.17, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.386319300541535e-07, "loss": 0.3309, "projector_lr": 2.8158957901624603e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.234375, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.3125, "sft_loss": 0.67578125, "step": 1049 }, { "dpo_loss": 0.416015625, "epoch": 0.17, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 9.385100058022241e-07, "loss": 0.2363, "projector_lr": 2.8155300174066727e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.318359375, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.53125, "sft_loss": 0.8125, "step": 1050 }, { "dpo_loss": 0.16796875, "epoch": 0.17, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.383879684876838e-07, "loss": 0.1377, "projector_lr": 2.8151639054630517e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.05029296875, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.734375, "sft_loss": 0.78125, "step": 1051 }, { "dpo_loss": 0.734375, "epoch": 0.17, "final_loss": 0.734375, "grad_norm": 0.0, "learning_rate": 9.382658181419975e-07, "loss": 0.4936, "projector_lr": 2.8147974544259928e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 1.96875, "rewards_train/rejected": -3.75, "sft_loss": 0.91796875, "step": 1052 }, { "dpo_loss": 0.52734375, "epoch": 0.17, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 9.381435547966599e-07, "loss": 0.3467, "projector_lr": 2.81443066438998e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 1.234375, "rewards_train/rejected": -2.546875, "sft_loss": 1.0703125, "step": 1053 }, { "dpo_loss": 0.5234375, "epoch": 0.17, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.380211784831942e-07, "loss": 0.3068, "projector_lr": 2.814063535449583e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.03125, "sft_loss": 0.7578125, "step": 1054 }, { "dpo_loss": 0.09716796875, "epoch": 0.17, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 9.378986892331535e-07, "loss": 0.2042, "projector_lr": 2.813696067699461e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.65625, "sft_loss": 0.7421875, "step": 1055 }, { "dpo_loss": 0.12158203125, "epoch": 0.17, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 9.377760870781196e-07, "loss": 0.1011, "projector_lr": 2.813328261234359e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1259765625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -4.9375, "sft_loss": 0.578125, "step": 1056 }, { "dpo_loss": 0.15234375, "epoch": 0.17, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 9.376533720497032e-07, "loss": 0.2224, "projector_lr": 2.81296011614911e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1689453125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.1875, "sft_loss": 1.015625, "step": 1057 }, { "dpo_loss": 0.047119140625, "epoch": 0.17, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 9.375305441795447e-07, "loss": 0.1575, "projector_lr": 2.8125916325386343e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.451171875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.40625, "sft_loss": 0.55859375, "step": 1058 }, { "dpo_loss": 0.263671875, "epoch": 0.17, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 9.374076034993131e-07, "loss": 0.1968, "projector_lr": 2.8122228104979392e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1318359375, "rewards_train/margins": 2.90625, "rewards_train/rejected": -2.765625, "sft_loss": 0.62890625, "step": 1059 }, { "dpo_loss": 0.0225830078125, "epoch": 0.17, "final_loss": 0.0225830078125, "grad_norm": 0.0, "learning_rate": 9.372845500407065e-07, "loss": 0.1955, "projector_lr": 2.8118536501221196e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04443359375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -4.6875, "sft_loss": 0.7578125, "step": 1060 }, { "dpo_loss": 0.53515625, "epoch": 0.17, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 9.371613838354524e-07, "loss": 0.3253, "projector_lr": 2.8114841515063575e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 1.890625, "rewards_train/rejected": -2.9375, "sft_loss": 0.87109375, "step": 1061 }, { "dpo_loss": 0.1904296875, "epoch": 0.17, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 9.370381049153071e-07, "loss": 0.1789, "projector_lr": 2.8111143147459215e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.9375, "sft_loss": 0.94921875, "step": 1062 }, { "dpo_loss": 0.68359375, "epoch": 0.17, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 9.36914713312056e-07, "loss": 0.4588, "projector_lr": 2.8107441399361685e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.328125, "sft_loss": 0.49609375, "step": 1063 }, { "dpo_loss": 0.22265625, "epoch": 0.17, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 9.367912090575138e-07, "loss": 0.207, "projector_lr": 2.810373627172542e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.0615234375, "rewards_train/margins": 3.09375, "rewards_train/rejected": -3.15625, "sft_loss": 0.7734375, "step": 1064 }, { "dpo_loss": 0.283203125, "epoch": 0.17, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 9.36667592183524e-07, "loss": 0.4978, "projector_lr": 2.810002776550572e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 2.09375, "rewards_train/rejected": -2.296875, "sft_loss": 0.88671875, "step": 1065 }, { "dpo_loss": 0.458984375, "epoch": 0.17, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 9.365438627219589e-07, "loss": 0.3444, "projector_lr": 2.809631588165877e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.328125, "rewards_train/margins": 1.6640625, "rewards_train/rejected": -1.9921875, "sft_loss": 0.94140625, "step": 1066 }, { "dpo_loss": 0.0390625, "epoch": 0.17, "final_loss": 0.0390625, "grad_norm": 0.0, "learning_rate": 9.364200207047203e-07, "loss": 0.0757, "projector_lr": 2.809260062114161e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.369140625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.09375, "sft_loss": 0.65625, "step": 1067 }, { "dpo_loss": 0.166015625, "epoch": 0.17, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 9.362960661637392e-07, "loss": 0.3545, "projector_lr": 2.8088881984912178e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.73828125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -2.875, "sft_loss": 0.60546875, "step": 1068 }, { "dpo_loss": 0.10009765625, "epoch": 0.17, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 9.361719991309745e-07, "loss": 0.4202, "projector_lr": 2.8085159973929235e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.37109375, "rewards_train/margins": 3.5, "rewards_train/rejected": -3.125, "sft_loss": 0.59375, "step": 1069 }, { "dpo_loss": 0.2099609375, "epoch": 0.17, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 9.360478196384153e-07, "loss": 0.3512, "projector_lr": 2.8081434589152464e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.875, "sft_loss": 0.80859375, "step": 1070 }, { "dpo_loss": 0.09521484375, "epoch": 0.17, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 9.359235277180793e-07, "loss": 0.0905, "projector_lr": 2.807770583154238e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2158203125, "rewards_train/margins": 2.953125, "rewards_train/rejected": -3.15625, "sft_loss": 0.7109375, "step": 1071 }, { "dpo_loss": 0.396484375, "epoch": 0.17, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 9.35799123402013e-07, "loss": 0.2367, "projector_lr": 2.807397370206039e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.353515625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.125, "sft_loss": 0.64453125, "step": 1072 }, { "dpo_loss": 0.2109375, "epoch": 0.17, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 9.356746067222918e-07, "loss": 0.1866, "projector_lr": 2.8070238201668756e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 1.9765625, "rewards_train/rejected": -2.6875, "sft_loss": 0.6328125, "step": 1073 }, { "dpo_loss": 0.1572265625, "epoch": 0.17, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 9.355499777110207e-07, "loss": 0.2157, "projector_lr": 2.8066499331330622e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.024658203125, "rewards_train/margins": 3.5625, "rewards_train/rejected": -3.53125, "sft_loss": 0.79296875, "step": 1074 }, { "dpo_loss": 0.51171875, "epoch": 0.17, "final_loss": 0.51171875, "grad_norm": 0.0, "learning_rate": 9.354252364003332e-07, "loss": 0.5634, "projector_lr": 2.8062757092009997e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4375, "rewards_train/margins": 1.265625, "rewards_train/rejected": -2.703125, "sft_loss": 0.91015625, "step": 1075 }, { "dpo_loss": 0.41796875, "epoch": 0.17, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 9.353003828223916e-07, "loss": 0.3696, "projector_lr": 2.8059011484671747e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.205078125, "rewards_train/margins": 3.734375, "rewards_train/rejected": -3.9375, "sft_loss": 0.8515625, "step": 1076 }, { "dpo_loss": 0.43359375, "epoch": 0.17, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 9.351754170093872e-07, "loss": 0.3415, "projector_lr": 2.805526251028162e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.1875, "sft_loss": 0.72265625, "step": 1077 }, { "dpo_loss": 0.10107421875, "epoch": 0.17, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 9.350503389935406e-07, "loss": 0.3534, "projector_lr": 2.8051510169806224e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1298828125, "rewards_train/margins": 2.953125, "rewards_train/rejected": -3.078125, "sft_loss": 0.73046875, "step": 1078 }, { "dpo_loss": 0.322265625, "epoch": 0.17, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 9.349251488071013e-07, "loss": 0.226, "projector_lr": 2.804775446421304e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 2.3125, "rewards_train/rejected": -3.390625, "sft_loss": 0.984375, "step": 1079 }, { "dpo_loss": 0.330078125, "epoch": 0.17, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 9.347998464823473e-07, "loss": 0.299, "projector_lr": 2.804399539447042e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.921875, "sft_loss": 0.7109375, "step": 1080 }, { "dpo_loss": 0.1728515625, "epoch": 0.17, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 9.346744320515857e-07, "loss": 0.1448, "projector_lr": 2.804023296154757e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.380859375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -3.8125, "sft_loss": 0.71484375, "step": 1081 }, { "dpo_loss": 0.318359375, "epoch": 0.17, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.345489055471528e-07, "loss": 0.2083, "projector_lr": 2.8036467166414583e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.4375, "sft_loss": 0.796875, "step": 1082 }, { "dpo_loss": 0.20703125, "epoch": 0.17, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 9.344232670014132e-07, "loss": 0.158, "projector_lr": 2.8032698010042398e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.291015625, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.75, "sft_loss": 0.86328125, "step": 1083 }, { "dpo_loss": 0.25390625, "epoch": 0.17, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.342975164467611e-07, "loss": 0.2102, "projector_lr": 2.8028925493402835e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 2.609375, "rewards_train/rejected": -3.171875, "sft_loss": 0.62890625, "step": 1084 }, { "dpo_loss": 0.10498046875, "epoch": 0.17, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 9.341716539156188e-07, "loss": 0.3349, "projector_lr": 2.8025149617468567e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1279296875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.71875, "sft_loss": 0.6171875, "step": 1085 }, { "dpo_loss": 0.72265625, "epoch": 0.17, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 9.340456794404384e-07, "loss": 0.5195, "projector_lr": 2.8021370383213153e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 1.3671875, "rewards_train/rejected": -3.21875, "sft_loss": 0.796875, "step": 1086 }, { "dpo_loss": 0.044189453125, "epoch": 0.17, "final_loss": 0.044189453125, "grad_norm": 0.0, "learning_rate": 9.339195930536999e-07, "loss": 0.1508, "projector_lr": 2.8017587791611e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2578125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.03125, "sft_loss": 0.8515625, "step": 1087 }, { "dpo_loss": 0.50390625, "epoch": 0.17, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.337933947879128e-07, "loss": 0.3397, "projector_lr": 2.801380184363739e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.03076171875, "rewards_train/margins": 2.5, "rewards_train/rejected": -2.53125, "sft_loss": 0.60546875, "step": 1088 }, { "dpo_loss": 0.314453125, "epoch": 0.17, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 9.336670846756154e-07, "loss": 0.1994, "projector_lr": 2.8010012540268463e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.8125, "sft_loss": 0.8515625, "step": 1089 }, { "dpo_loss": 0.1376953125, "epoch": 0.17, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 9.335406627493744e-07, "loss": 0.4727, "projector_lr": 2.8006219882481235e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40234375, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.125, "sft_loss": 0.66015625, "step": 1090 }, { "dpo_loss": 0.40625, "epoch": 0.17, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 9.334141290417859e-07, "loss": 0.575, "projector_lr": 2.800242387125358e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.056396484375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -3.734375, "sft_loss": 0.58984375, "step": 1091 }, { "dpo_loss": 0.310546875, "epoch": 0.17, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 9.332874835854743e-07, "loss": 0.3029, "projector_lr": 2.799862450756423e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.25, "sft_loss": 0.66015625, "step": 1092 }, { "dpo_loss": 0.333984375, "epoch": 0.17, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 9.331607264130932e-07, "loss": 0.2902, "projector_lr": 2.79948217923928e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.318359375, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.25, "sft_loss": 0.859375, "step": 1093 }, { "dpo_loss": 0.365234375, "epoch": 0.18, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 9.330338575573247e-07, "loss": 0.2229, "projector_lr": 2.7991015726719743e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.111328125, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.3125, "sft_loss": 0.66015625, "step": 1094 }, { "dpo_loss": 0.251953125, "epoch": 0.18, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 9.329068770508802e-07, "loss": 0.1527, "projector_lr": 2.7987206311526404e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.134765625, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.375, "sft_loss": 0.578125, "step": 1095 }, { "dpo_loss": 0.58203125, "epoch": 0.18, "final_loss": 0.58203125, "grad_norm": 0.0, "learning_rate": 9.32779784926499e-07, "loss": 0.3004, "projector_lr": 2.798339354779497e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 1.25, "rewards_train/rejected": -2.015625, "sft_loss": 0.8203125, "step": 1096 }, { "dpo_loss": 0.443359375, "epoch": 0.18, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 9.326525812169501e-07, "loss": 0.3622, "projector_lr": 2.7979577436508507e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 1.875, "rewards_train/rejected": -2.625, "sft_loss": 0.94140625, "step": 1097 }, { "dpo_loss": 0.033935546875, "epoch": 0.18, "final_loss": 0.033935546875, "grad_norm": 0.0, "learning_rate": 9.325252659550308e-07, "loss": 0.2119, "projector_lr": 2.797575797865093e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6171875, "rewards_train/margins": 5.875, "rewards_train/rejected": -5.25, "sft_loss": 0.376953125, "step": 1098 }, { "dpo_loss": 0.54296875, "epoch": 0.18, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 9.323978391735674e-07, "loss": 0.4127, "projector_lr": 2.7971935175207022e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.00048828125, "rewards_train/margins": 1.59375, "rewards_train/rejected": -1.59375, "sft_loss": 1.1484375, "step": 1099 }, { "dpo_loss": 0.1826171875, "epoch": 0.18, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 9.322703009054147e-07, "loss": 0.1723, "projector_lr": 2.7968109027162443e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1103515625, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.46875, "sft_loss": 0.82421875, "step": 1100 }, { "dpo_loss": 0.11181640625, "epoch": 0.18, "final_loss": 0.11181640625, "grad_norm": 0.0, "learning_rate": 9.321426511834563e-07, "loss": 0.088, "projector_lr": 2.796427953550369e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.90625, "sft_loss": 0.8203125, "step": 1101 }, { "dpo_loss": 0.62109375, "epoch": 0.18, "final_loss": 0.62109375, "grad_norm": 0.0, "learning_rate": 9.320148900406044e-07, "loss": 0.3986, "projector_lr": 2.7960446701218135e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 1.703125, "rewards_train/rejected": -2.25, "sft_loss": 0.8125, "step": 1102 }, { "dpo_loss": 0.1943359375, "epoch": 0.18, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 9.318870175098005e-07, "loss": 0.1876, "projector_lr": 2.7956610525294015e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.265625, "rewards_train/margins": 2.296875, "rewards_train/rejected": -2.5625, "sft_loss": 0.671875, "step": 1103 }, { "dpo_loss": 0.20703125, "epoch": 0.18, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 9.317590336240142e-07, "loss": 0.2578, "projector_lr": 2.795277100872043e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.1875, "rewards_train/margins": 3.890625, "rewards_train/rejected": -3.703125, "sft_loss": 0.59765625, "step": 1104 }, { "dpo_loss": 0.490234375, "epoch": 0.18, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 9.316309384162441e-07, "loss": 0.3196, "projector_lr": 2.7948928152487325e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.28125, "sft_loss": 0.71484375, "step": 1105 }, { "dpo_loss": 0.1328125, "epoch": 0.18, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 9.315027319195174e-07, "loss": 0.1543, "projector_lr": 2.7945081957585526e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -3.984375, "sft_loss": 0.6953125, "step": 1106 }, { "dpo_loss": 0.341796875, "epoch": 0.18, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 9.313744141668902e-07, "loss": 0.3294, "projector_lr": 2.794123242500671e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.06298828125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.140625, "sft_loss": 0.796875, "step": 1107 }, { "dpo_loss": 0.6640625, "epoch": 0.18, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 9.312459851914469e-07, "loss": 0.4208, "projector_lr": 2.793737955574341e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 1.71875, "rewards_train/rejected": -2.296875, "sft_loss": 0.66015625, "step": 1108 }, { "dpo_loss": 0.1494140625, "epoch": 0.18, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 9.31117445026301e-07, "loss": 0.1569, "projector_lr": 2.7933523350789035e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.271484375, "rewards_train/margins": 3.09375, "rewards_train/rejected": -3.375, "sft_loss": 0.74609375, "step": 1109 }, { "dpo_loss": 0.67578125, "epoch": 0.18, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 9.309887937045944e-07, "loss": 0.5075, "projector_lr": 2.7929663811137833e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 0.79296875, "rewards_train/rejected": -1.9375, "sft_loss": 0.74609375, "step": 1110 }, { "dpo_loss": 0.1328125, "epoch": 0.18, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 9.308600312594977e-07, "loss": 0.1571, "projector_lr": 2.792580093778493e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 3.84375, "rewards_train/rejected": -4.5, "sft_loss": 0.66796875, "step": 1111 }, { "dpo_loss": 0.08642578125, "epoch": 0.18, "final_loss": 0.08642578125, "grad_norm": 0.0, "learning_rate": 9.307311577242102e-07, "loss": 0.1214, "projector_lr": 2.792193473172631e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.4375, "sft_loss": 0.640625, "step": 1112 }, { "dpo_loss": 0.251953125, "epoch": 0.18, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 9.306021731319598e-07, "loss": 0.2046, "projector_lr": 2.7918065193958796e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.25, "sft_loss": 0.71875, "step": 1113 }, { "dpo_loss": 0.765625, "epoch": 0.18, "final_loss": 0.765625, "grad_norm": 0.0, "learning_rate": 9.304730775160032e-07, "loss": 0.5365, "projector_lr": 2.79141923254801e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.890625, "rewards_train/margins": 0.67578125, "rewards_train/rejected": -2.5625, "sft_loss": 0.8125, "step": 1114 }, { "dpo_loss": 0.03759765625, "epoch": 0.18, "final_loss": 0.03759765625, "grad_norm": 0.0, "learning_rate": 9.303438709096254e-07, "loss": 0.1238, "projector_lr": 2.7910316127288765e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.75, "sft_loss": 0.62109375, "step": 1115 }, { "dpo_loss": 0.177734375, "epoch": 0.18, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 9.302145533461403e-07, "loss": 0.16, "projector_lr": 2.790643660038421e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.25, "sft_loss": 0.82421875, "step": 1116 }, { "dpo_loss": 0.1923828125, "epoch": 0.18, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 9.300851248588904e-07, "loss": 0.434, "projector_lr": 2.7902553745766714e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 3.75, "rewards_train/rejected": -3.890625, "sft_loss": 0.87890625, "step": 1117 }, { "dpo_loss": 0.125, "epoch": 0.18, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 9.299555854812465e-07, "loss": 0.1596, "projector_lr": 2.7898667564437392e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.375, "rewards_train/margins": 2.515625, "rewards_train/rejected": -2.875, "sft_loss": 0.921875, "step": 1118 }, { "dpo_loss": 0.16796875, "epoch": 0.18, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.298259352466084e-07, "loss": 0.2138, "projector_lr": 2.7894778057398253e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -2.75, "sft_loss": 0.68359375, "step": 1119 }, { "dpo_loss": 0.059814453125, "epoch": 0.18, "final_loss": 0.059814453125, "grad_norm": 0.0, "learning_rate": 9.296961741884042e-07, "loss": 0.1314, "projector_lr": 2.7890885225652126e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.28125, "sft_loss": 0.7421875, "step": 1120 }, { "dpo_loss": 0.76171875, "epoch": 0.18, "final_loss": 0.76171875, "grad_norm": 0.0, "learning_rate": 9.295663023400906e-07, "loss": 0.4912, "projector_lr": 2.788698907020272e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.265625, "rewards_train/margins": 1.5546875, "rewards_train/rejected": -3.8125, "sft_loss": 0.65234375, "step": 1121 }, { "dpo_loss": 0.2275390625, "epoch": 0.18, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 9.294363197351529e-07, "loss": 0.231, "projector_lr": 2.788308959205459e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.34375, "sft_loss": 0.7421875, "step": 1122 }, { "dpo_loss": 0.33984375, "epoch": 0.18, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 9.293062264071052e-07, "loss": 0.4702, "projector_lr": 2.787918679221316e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 1.640625, "rewards_train/rejected": -2.109375, "sft_loss": 0.71484375, "step": 1123 }, { "dpo_loss": 0.427734375, "epoch": 0.18, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 9.291760223894899e-07, "loss": 0.2452, "projector_lr": 2.78752806716847e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 2.421875, "rewards_train/rejected": -4.09375, "sft_loss": 0.87890625, "step": 1124 }, { "dpo_loss": 0.15234375, "epoch": 0.18, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 9.290457077158777e-07, "loss": 0.2606, "projector_lr": 2.7871371231476337e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 2.921875, "rewards_train/rejected": -3.484375, "sft_loss": 0.828125, "step": 1125 }, { "dpo_loss": 0.201171875, "epoch": 0.18, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 9.289152824198683e-07, "loss": 0.1895, "projector_lr": 2.786745847259605e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.1875, "sft_loss": 0.640625, "step": 1126 }, { "dpo_loss": 0.4296875, "epoch": 0.18, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 9.287847465350897e-07, "loss": 0.277, "projector_lr": 2.7863542396052697e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.84375, "sft_loss": 0.63671875, "step": 1127 }, { "dpo_loss": 0.142578125, "epoch": 0.18, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 9.286541000951986e-07, "loss": 0.1669, "projector_lr": 2.785962300285596e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.84375, "sft_loss": 0.6875, "step": 1128 }, { "dpo_loss": 0.19921875, "epoch": 0.18, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 9.285233431338797e-07, "loss": 0.1726, "projector_lr": 2.7855700294016394e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 2.40625, "rewards_train/rejected": -3.375, "sft_loss": 0.7578125, "step": 1129 }, { "dpo_loss": 0.1748046875, "epoch": 0.18, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 9.283924756848467e-07, "loss": 0.1486, "projector_lr": 2.7851774270545404e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.53125, "sft_loss": 0.6015625, "step": 1130 }, { "dpo_loss": 0.06689453125, "epoch": 0.18, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 9.282614977818418e-07, "loss": 0.155, "projector_lr": 2.7847844933455255e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.6875, "sft_loss": 0.8125, "step": 1131 }, { "dpo_loss": 0.361328125, "epoch": 0.18, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.28130409458635e-07, "loss": 0.2633, "projector_lr": 2.7843912283759055e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.8125, "sft_loss": 0.95703125, "step": 1132 }, { "dpo_loss": 0.05322265625, "epoch": 0.18, "final_loss": 0.05322265625, "grad_norm": 0.0, "learning_rate": 9.27999210749026e-07, "loss": 0.139, "projector_lr": 2.783997632247078e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.875, "sft_loss": 0.87890625, "step": 1133 }, { "dpo_loss": 0.279296875, "epoch": 0.18, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 9.278679016868416e-07, "loss": 0.2472, "projector_lr": 2.783603705060525e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 2.078125, "rewards_train/rejected": -3.4375, "sft_loss": 1.125, "step": 1134 }, { "dpo_loss": 0.1962890625, "epoch": 0.18, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 9.277364823059382e-07, "loss": 0.3622, "projector_lr": 2.7832094469178145e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.921875, "rewards_train/margins": 2.40625, "rewards_train/rejected": -4.3125, "sft_loss": 0.88671875, "step": 1135 }, { "dpo_loss": 0.10986328125, "epoch": 0.18, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 9.276049526401996e-07, "loss": 0.366, "projector_lr": 2.782814857920599e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 2.53125, "rewards_train/rejected": -3.484375, "sft_loss": 0.6953125, "step": 1136 }, { "dpo_loss": 0.07177734375, "epoch": 0.18, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 9.274733127235389e-07, "loss": 0.1843, "projector_lr": 2.782419938170617e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.125, "sft_loss": 0.6328125, "step": 1137 }, { "dpo_loss": 0.04345703125, "epoch": 0.18, "final_loss": 0.04345703125, "grad_norm": 0.0, "learning_rate": 9.273415625898972e-07, "loss": 0.0823, "projector_lr": 2.782024687769692e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.490234375, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.40625, "sft_loss": 0.55078125, "step": 1138 }, { "dpo_loss": 0.279296875, "epoch": 0.18, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 9.272097022732443e-07, "loss": 0.4046, "projector_lr": 2.7816291068197328e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.671875, "sft_loss": 0.82421875, "step": 1139 }, { "dpo_loss": 0.265625, "epoch": 0.18, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.270777318075778e-07, "loss": 0.1547, "projector_lr": 2.781233195422734e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 2.21875, "rewards_train/rejected": -3.140625, "sft_loss": 0.59765625, "step": 1140 }, { "dpo_loss": 0.0208740234375, "epoch": 0.18, "final_loss": 0.0208740234375, "grad_norm": 0.0, "learning_rate": 9.269456512269247e-07, "loss": 0.0951, "projector_lr": 2.780836953680774e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.09375, "sft_loss": 0.58203125, "step": 1141 }, { "dpo_loss": 0.1435546875, "epoch": 0.18, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 9.268134605653393e-07, "loss": 0.0839, "projector_lr": 2.780440381696018e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.052490234375, "rewards_train/margins": 2.640625, "rewards_train/rejected": -2.59375, "sft_loss": 0.625, "step": 1142 }, { "dpo_loss": 0.5078125, "epoch": 0.18, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 9.266811598569049e-07, "loss": 0.3666, "projector_lr": 2.7800434795707146e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0625, "rewards_train/margins": 2.015625, "rewards_train/rejected": -3.078125, "sft_loss": 0.7421875, "step": 1143 }, { "dpo_loss": 0.11279296875, "epoch": 0.18, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 9.265487491357332e-07, "loss": 0.1259, "projector_lr": 2.7796462474072e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -4.15625, "sft_loss": 0.6875, "step": 1144 }, { "dpo_loss": 0.33984375, "epoch": 0.18, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 9.264162284359641e-07, "loss": 0.4264, "projector_lr": 2.7792486853078926e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.640625, "rewards_train/margins": 1.890625, "rewards_train/rejected": -3.53125, "sft_loss": 0.671875, "step": 1145 }, { "dpo_loss": 0.287109375, "epoch": 0.18, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 9.26283597791766e-07, "loss": 0.1902, "projector_lr": 2.778850793375298e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 1.8359375, "rewards_train/rejected": -2.859375, "sft_loss": 0.9453125, "step": 1146 }, { "dpo_loss": 0.2177734375, "epoch": 0.18, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 9.261508572373352e-07, "loss": 0.1314, "projector_lr": 2.778452571712006e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.921875, "sft_loss": 0.83203125, "step": 1147 }, { "dpo_loss": 0.0966796875, "epoch": 0.18, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 9.26018006806897e-07, "loss": 0.0838, "projector_lr": 2.7780540204206914e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.65625, "sft_loss": 0.765625, "step": 1148 }, { "dpo_loss": 0.154296875, "epoch": 0.18, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 9.258850465347045e-07, "loss": 0.087, "projector_lr": 2.7776551396041137e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1181640625, "rewards_train/margins": 5.78125, "rewards_train/rejected": -5.65625, "sft_loss": 0.6640625, "step": 1149 }, { "dpo_loss": 0.302734375, "epoch": 0.18, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 9.257519764550394e-07, "loss": 0.1765, "projector_lr": 2.7772559293651184e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.265625, "sft_loss": 0.98828125, "step": 1150 }, { "dpo_loss": 0.25390625, "epoch": 0.18, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.256187966022118e-07, "loss": 0.2023, "projector_lr": 2.7768563898066355e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.388671875, "rewards_train/margins": 3.421875, "rewards_train/rejected": -3.8125, "sft_loss": 0.703125, "step": 1151 }, { "dpo_loss": 0.2451171875, "epoch": 0.18, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 9.254855070105597e-07, "loss": 0.1604, "projector_lr": 2.776456521031679e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.07470703125, "rewards_train/margins": 2.75, "rewards_train/rejected": -2.8125, "sft_loss": 0.6015625, "step": 1152 }, { "dpo_loss": 0.2216796875, "epoch": 0.18, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 9.253521077144497e-07, "loss": 0.2009, "projector_lr": 2.7760563231433494e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.59375, "sft_loss": 0.94921875, "step": 1153 }, { "dpo_loss": 0.5703125, "epoch": 0.18, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 9.252185987482766e-07, "loss": 0.3807, "projector_lr": 2.77565579624483e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 1.53125, "rewards_train/rejected": -2.109375, "sft_loss": 0.78125, "step": 1154 }, { "dpo_loss": 0.216796875, "epoch": 0.18, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 9.250849801464636e-07, "loss": 0.1778, "projector_lr": 2.775254940439391e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.15625, "sft_loss": 0.62109375, "step": 1155 }, { "dpo_loss": 0.1259765625, "epoch": 0.18, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 9.249512519434621e-07, "loss": 0.3541, "projector_lr": 2.774853755830386e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.640625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.5625, "sft_loss": 0.50390625, "step": 1156 }, { "dpo_loss": 0.20703125, "epoch": 0.19, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 9.248174141737514e-07, "loss": 0.2129, "projector_lr": 2.774452242521255e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 2.015625, "rewards_train/rejected": -2.6875, "sft_loss": 0.65234375, "step": 1157 }, { "dpo_loss": 0.2060546875, "epoch": 0.19, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 9.246834668718398e-07, "loss": 0.3621, "projector_lr": 2.7740504006155193e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.265625, "sft_loss": 0.8203125, "step": 1158 }, { "dpo_loss": 0.04931640625, "epoch": 0.19, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 9.24549410072263e-07, "loss": 0.2456, "projector_lr": 2.7736482302167894e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0380859375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.09375, "sft_loss": 0.671875, "step": 1159 }, { "dpo_loss": 0.0380859375, "epoch": 0.19, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 9.244152438095856e-07, "loss": 0.0464, "projector_lr": 2.7732457314287574e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.75, "sft_loss": 0.73828125, "step": 1160 }, { "dpo_loss": 0.412109375, "epoch": 0.19, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 9.242809681184003e-07, "loss": 0.4083, "projector_lr": 2.772842904355201e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3984375, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.65625, "sft_loss": 0.53125, "step": 1161 }, { "dpo_loss": 0.267578125, "epoch": 0.19, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 9.241465830333277e-07, "loss": 0.189, "projector_lr": 2.7724397490999834e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.75, "sft_loss": 0.8046875, "step": 1162 }, { "dpo_loss": 0.08740234375, "epoch": 0.19, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 9.240120885890166e-07, "loss": 0.3209, "projector_lr": 2.7720362657670498e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11279296875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -3.453125, "sft_loss": 0.65625, "step": 1163 }, { "dpo_loss": 0.01220703125, "epoch": 0.19, "final_loss": 0.01220703125, "grad_norm": 0.0, "learning_rate": 9.238774848201445e-07, "loss": 0.0822, "projector_lr": 2.7716324544604336e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1064453125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -5.6875, "sft_loss": 0.8046875, "step": 1164 }, { "dpo_loss": 0.326171875, "epoch": 0.19, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 9.237427717614168e-07, "loss": 0.3255, "projector_lr": 2.7712283152842504e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 2.046875, "rewards_train/rejected": -2.875, "sft_loss": 0.71875, "step": 1165 }, { "dpo_loss": 0.146484375, "epoch": 0.19, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 9.236079494475668e-07, "loss": 0.1583, "projector_lr": 2.7708238483427005e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.296875, "sft_loss": 0.56640625, "step": 1166 }, { "dpo_loss": 0.3828125, "epoch": 0.19, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 9.234730179133564e-07, "loss": 0.2668, "projector_lr": 2.770419053740069e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.65625, "sft_loss": 0.89453125, "step": 1167 }, { "dpo_loss": 0.255859375, "epoch": 0.19, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 9.233379771935752e-07, "loss": 0.2436, "projector_lr": 2.7700139315807258e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 3.140625, "rewards_train/rejected": -3.359375, "sft_loss": 0.640625, "step": 1168 }, { "dpo_loss": 0.3203125, "epoch": 0.19, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 9.232028273230415e-07, "loss": 0.218, "projector_lr": 2.769608481969125e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 2.34375, "rewards_train/rejected": -3.03125, "sft_loss": 0.9765625, "step": 1169 }, { "dpo_loss": 0.1748046875, "epoch": 0.19, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 9.230675683366016e-07, "loss": 0.3497, "projector_lr": 2.769202705009805e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.07470703125, "rewards_train/margins": 3.171875, "rewards_train/rejected": -3.09375, "sft_loss": 0.703125, "step": 1170 }, { "dpo_loss": 0.1650390625, "epoch": 0.19, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 9.229322002691296e-07, "loss": 0.1494, "projector_lr": 2.768796600807389e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.390625, "sft_loss": 0.74609375, "step": 1171 }, { "dpo_loss": 0.94140625, "epoch": 0.19, "final_loss": 0.94140625, "grad_norm": 0.0, "learning_rate": 9.227967231555278e-07, "loss": 0.5218, "projector_lr": 2.7683901694665837e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 1.046875, "rewards_train/rejected": -2.5625, "sft_loss": 0.87109375, "step": 1172 }, { "dpo_loss": 0.24609375, "epoch": 0.19, "final_loss": 0.24609375, "grad_norm": 0.0, "learning_rate": 9.226611370307271e-07, "loss": 0.2449, "projector_lr": 2.7679834110921814e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.10595703125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.34375, "sft_loss": 0.6953125, "step": 1173 }, { "dpo_loss": 0.58984375, "epoch": 0.19, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 9.225254419296859e-07, "loss": 0.3161, "projector_lr": 2.767576325789058e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.65625, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.515625, "sft_loss": 0.81640625, "step": 1174 }, { "dpo_loss": 0.1923828125, "epoch": 0.19, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 9.223896378873913e-07, "loss": 0.1992, "projector_lr": 2.7671689136621737e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.36328125, "rewards_train/margins": 2.984375, "rewards_train/rejected": -2.625, "sft_loss": 0.64453125, "step": 1175 }, { "dpo_loss": 0.18359375, "epoch": 0.19, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 9.222537249388576e-07, "loss": 0.1733, "projector_lr": 2.7667611748165727e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 2.90625, "rewards_train/rejected": -3.484375, "sft_loss": 0.8515625, "step": 1176 }, { "dpo_loss": 0.1943359375, "epoch": 0.19, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 9.221177031191281e-07, "loss": 0.2698, "projector_lr": 2.7663531093573843e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.287109375, "rewards_train/margins": 3.90625, "rewards_train/rejected": -3.609375, "sft_loss": 0.765625, "step": 1177 }, { "dpo_loss": 0.10009765625, "epoch": 0.19, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 9.219815724632735e-07, "loss": 0.1051, "projector_lr": 2.765944717389821e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0341796875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -4.9375, "sft_loss": 0.71875, "step": 1178 }, { "dpo_loss": 0.2177734375, "epoch": 0.19, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 9.218453330063933e-07, "loss": 0.259, "projector_lr": 2.76553599901918e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1943359375, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.78125, "sft_loss": 0.80859375, "step": 1179 }, { "dpo_loss": 0.078125, "epoch": 0.19, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 9.217089847836144e-07, "loss": 0.1052, "projector_lr": 2.7651269543508433e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.021484375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.3125, "sft_loss": 0.546875, "step": 1180 }, { "dpo_loss": 0.138671875, "epoch": 0.19, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 9.215725278300918e-07, "loss": 0.2808, "projector_lr": 2.7647175834902757e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.146484375, "rewards_train/margins": 2.84375, "rewards_train/rejected": -2.703125, "sft_loss": 0.53125, "step": 1181 }, { "dpo_loss": 0.0849609375, "epoch": 0.19, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 9.214359621810088e-07, "loss": 0.1405, "projector_lr": 2.764307886543027e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.043212890625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -3.703125, "sft_loss": 0.62109375, "step": 1182 }, { "dpo_loss": 0.515625, "epoch": 0.19, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 9.212992878715768e-07, "loss": 0.5243, "projector_lr": 2.7638978636147307e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.0771484375, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.15625, "sft_loss": 0.55859375, "step": 1183 }, { "dpo_loss": 0.63671875, "epoch": 0.19, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 9.211625049370346e-07, "loss": 0.398, "projector_lr": 2.763487514811104e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 1.1796875, "rewards_train/rejected": -2.09375, "sft_loss": 0.64453125, "step": 1184 }, { "dpo_loss": 0.0634765625, "epoch": 0.19, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 9.2102561341265e-07, "loss": 0.1757, "projector_lr": 2.76307684023795e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.314453125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.34375, "sft_loss": 0.6640625, "step": 1185 }, { "dpo_loss": 0.056640625, "epoch": 0.19, "final_loss": 0.056640625, "grad_norm": 0.0, "learning_rate": 9.208886133337177e-07, "loss": 0.2398, "projector_lr": 2.7626658400011532e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.34375, "sft_loss": 0.68359375, "step": 1186 }, { "dpo_loss": 0.490234375, "epoch": 0.19, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 9.207515047355611e-07, "loss": 0.3043, "projector_lr": 2.7622545142066834e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.90625, "rewards_train/margins": 2.09375, "rewards_train/rejected": -3.0, "sft_loss": 0.83203125, "step": 1187 }, { "dpo_loss": 0.09521484375, "epoch": 0.19, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 9.206142876535314e-07, "loss": 0.1774, "projector_lr": 2.7618428629605945e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.078125, "rewards_train/margins": 4.625, "rewards_train/rejected": -4.6875, "sft_loss": 0.65234375, "step": 1188 }, { "dpo_loss": 0.3359375, "epoch": 0.19, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 9.204769621230079e-07, "loss": 0.403, "projector_lr": 2.7614308863690243e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.59375, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.171875, "sft_loss": 0.53515625, "step": 1189 }, { "dpo_loss": 0.09228515625, "epoch": 0.19, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 9.203395281793978e-07, "loss": 0.1637, "projector_lr": 2.7610185845381933e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 3.328125, "rewards_train/rejected": -3.921875, "sft_loss": 0.7109375, "step": 1190 }, { "dpo_loss": 0.306640625, "epoch": 0.19, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 9.20201985858136e-07, "loss": 0.4162, "projector_lr": 2.760605957574408e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.29296875, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -1.4921875, "sft_loss": 0.443359375, "step": 1191 }, { "dpo_loss": 0.14453125, "epoch": 0.19, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 9.200643351946854e-07, "loss": 0.1518, "projector_lr": 2.7601930055840565e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01287841796875, "rewards_train/margins": 3.890625, "rewards_train/rejected": -3.90625, "sft_loss": 0.81640625, "step": 1192 }, { "dpo_loss": 0.41015625, "epoch": 0.19, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 9.199265762245372e-07, "loss": 0.2986, "projector_lr": 2.7597797286736122e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.234375, "sft_loss": 0.76171875, "step": 1193 }, { "dpo_loss": 0.10400390625, "epoch": 0.19, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 9.197887089832104e-07, "loss": 0.1069, "projector_lr": 2.7593661269496314e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -3.484375, "sft_loss": 0.75, "step": 1194 }, { "dpo_loss": 0.6796875, "epoch": 0.19, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 9.196507335062514e-07, "loss": 0.3734, "projector_lr": 2.7589522005187543e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 1.90625, "rewards_train/rejected": -2.515625, "sft_loss": 0.80859375, "step": 1195 }, { "dpo_loss": 0.29296875, "epoch": 0.19, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 9.195126498292353e-07, "loss": 0.275, "projector_lr": 2.758537949487706e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1943359375, "rewards_train/margins": 2.78125, "rewards_train/rejected": -2.96875, "sft_loss": 0.64453125, "step": 1196 }, { "dpo_loss": 0.25390625, "epoch": 0.19, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 9.193744579877644e-07, "loss": 0.1908, "projector_lr": 2.7581233739632936e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.443359375, "rewards_train/margins": 3.734375, "rewards_train/rejected": -3.296875, "sft_loss": 0.5625, "step": 1197 }, { "dpo_loss": 0.0267333984375, "epoch": 0.19, "final_loss": 0.0267333984375, "grad_norm": 0.0, "learning_rate": 9.192361580174695e-07, "loss": 0.0891, "projector_lr": 2.757708474052409e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2431640625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.4375, "sft_loss": 0.6796875, "step": 1198 }, { "dpo_loss": 0.2109375, "epoch": 0.19, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 9.190977499540089e-07, "loss": 0.3717, "projector_lr": 2.757293249862027e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0810546875, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.078125, "sft_loss": 0.82421875, "step": 1199 }, { "dpo_loss": 0.373046875, "epoch": 0.19, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 9.189592338330686e-07, "loss": 0.304, "projector_lr": 2.756877701499206e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.5, "sft_loss": 0.9453125, "step": 1200 }, { "dpo_loss": 0.08740234375, "epoch": 0.19, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 9.18820609690363e-07, "loss": 0.3375, "projector_lr": 2.756461829071089e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.90625, "sft_loss": 0.66796875, "step": 1201 }, { "dpo_loss": 0.734375, "epoch": 0.19, "final_loss": 0.734375, "grad_norm": 0.0, "learning_rate": 9.18681877561634e-07, "loss": 0.3986, "projector_lr": 2.7560456326849023e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 0.90625, "rewards_train/rejected": -1.9765625, "sft_loss": 0.78125, "step": 1202 }, { "dpo_loss": 0.1337890625, "epoch": 0.19, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 9.185430374826513e-07, "loss": 0.2607, "projector_lr": 2.755629112447954e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.15625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.0625, "sft_loss": 0.5546875, "step": 1203 }, { "dpo_loss": 0.25, "epoch": 0.19, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 9.184040894892125e-07, "loss": 0.1928, "projector_lr": 2.755212268467638e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.890625, "sft_loss": 0.66796875, "step": 1204 }, { "dpo_loss": 0.1015625, "epoch": 0.19, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 9.182650336171434e-07, "loss": 0.2525, "projector_lr": 2.75479510085143e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 2.8125, "rewards_train/rejected": -3.40625, "sft_loss": 0.75, "step": 1205 }, { "dpo_loss": 0.345703125, "epoch": 0.19, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 9.181258699022967e-07, "loss": 0.2251, "projector_lr": 2.7543776097068907e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.625, "sft_loss": 0.62890625, "step": 1206 }, { "dpo_loss": 0.185546875, "epoch": 0.19, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 9.179865983805541e-07, "loss": 0.1159, "projector_lr": 2.7539597951416626e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.150390625, "rewards_train/margins": 3.03125, "rewards_train/rejected": -2.890625, "sft_loss": 0.89453125, "step": 1207 }, { "dpo_loss": 0.265625, "epoch": 0.19, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.178472190878242e-07, "loss": 0.3992, "projector_lr": 2.7535416572634728e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.40625, "rewards_train/margins": 3.484375, "rewards_train/rejected": -3.890625, "sft_loss": 0.73828125, "step": 1208 }, { "dpo_loss": 0.0966796875, "epoch": 0.19, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 9.177077320600435e-07, "loss": 0.2092, "projector_lr": 2.7531231961801307e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.21875, "sft_loss": 1.0, "step": 1209 }, { "dpo_loss": 0.19921875, "epoch": 0.19, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 9.175681373331765e-07, "loss": 0.1778, "projector_lr": 2.7527044119995297e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.294921875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.625, "sft_loss": 0.82421875, "step": 1210 }, { "dpo_loss": 0.14453125, "epoch": 0.19, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 9.174284349432157e-07, "loss": 0.1711, "projector_lr": 2.752285304829647e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.53125, "sft_loss": 0.734375, "step": 1211 }, { "dpo_loss": 0.265625, "epoch": 0.19, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 9.172886249261808e-07, "loss": 0.3611, "projector_lr": 2.7518658747785427e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.578125, "sft_loss": 0.86328125, "step": 1212 }, { "dpo_loss": 0.1533203125, "epoch": 0.19, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 9.171487073181197e-07, "loss": 0.2246, "projector_lr": 2.751446121954359e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -2.671875, "sft_loss": 0.6953125, "step": 1213 }, { "dpo_loss": 0.333984375, "epoch": 0.19, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 9.170086821551075e-07, "loss": 0.2186, "projector_lr": 2.751026046465323e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.796875, "sft_loss": 0.74609375, "step": 1214 }, { "dpo_loss": 0.6328125, "epoch": 0.19, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 9.168685494732479e-07, "loss": 0.3777, "projector_lr": 2.750605648419744e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 1.40625, "rewards_train/rejected": -2.15625, "sft_loss": 0.515625, "step": 1215 }, { "dpo_loss": 0.2080078125, "epoch": 0.19, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 9.167283093086715e-07, "loss": 0.2085, "projector_lr": 2.7501849279260145e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 2.375, "rewards_train/rejected": -3.171875, "sft_loss": 0.82421875, "step": 1216 }, { "dpo_loss": 0.1533203125, "epoch": 0.19, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 9.165879616975369e-07, "loss": 0.1455, "projector_lr": 2.749763885092611e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.146484375, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.125, "sft_loss": 0.58203125, "step": 1217 }, { "dpo_loss": 0.228515625, "epoch": 0.19, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 9.164475066760305e-07, "loss": 0.5267, "projector_lr": 2.749342520028092e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8125, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.859375, "sft_loss": 0.9453125, "step": 1218 }, { "dpo_loss": 0.365234375, "epoch": 0.2, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 9.163069442803664e-07, "loss": 0.2599, "projector_lr": 2.7489208328410997e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.46875, "sft_loss": 0.498046875, "step": 1219 }, { "dpo_loss": 0.1484375, "epoch": 0.2, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 9.161662745467864e-07, "loss": 0.1788, "projector_lr": 2.7484988236403595e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 2.890625, "rewards_train/rejected": -3.75, "sft_loss": 0.8515625, "step": 1220 }, { "dpo_loss": 0.1953125, "epoch": 0.2, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 9.160254975115596e-07, "loss": 0.262, "projector_lr": 2.748076492534679e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.421875, "rewards_train/margins": 3.484375, "rewards_train/rejected": -3.921875, "sft_loss": 0.68359375, "step": 1221 }, { "dpo_loss": 0.28125, "epoch": 0.2, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.158846132109831e-07, "loss": 0.1479, "projector_lr": 2.7476538396329495e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.890625, "sft_loss": 0.6328125, "step": 1222 }, { "dpo_loss": 0.11962890625, "epoch": 0.2, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 9.157436216813816e-07, "loss": 0.2525, "projector_lr": 2.747230865044145e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14453125, "rewards_train/margins": 3.171875, "rewards_train/rejected": -3.3125, "sft_loss": 0.7265625, "step": 1223 }, { "dpo_loss": 0.2255859375, "epoch": 0.2, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 9.156025229591078e-07, "loss": 0.1838, "projector_lr": 2.7468075688773237e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.314453125, "rewards_train/margins": 2.234375, "rewards_train/rejected": -2.546875, "sft_loss": 0.7734375, "step": 1224 }, { "dpo_loss": 0.25, "epoch": 0.2, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 9.154613170805413e-07, "loss": 0.1601, "projector_lr": 2.746383951241624e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.361328125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.203125, "sft_loss": 0.74609375, "step": 1225 }, { "dpo_loss": 0.57421875, "epoch": 0.2, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.153200040820899e-07, "loss": 0.3143, "projector_lr": 2.74596001224627e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 2.25, "rewards_train/rejected": -3.078125, "sft_loss": 0.73828125, "step": 1226 }, { "dpo_loss": 0.6796875, "epoch": 0.2, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 9.151785840001886e-07, "loss": 0.423, "projector_lr": 2.745535752000566e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.359375, "rewards_train/margins": 1.6640625, "rewards_train/rejected": -3.03125, "sft_loss": 0.9375, "step": 1227 }, { "dpo_loss": 0.1845703125, "epoch": 0.2, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.150370568713006e-07, "loss": 0.1925, "projector_lr": 2.7451111706139022e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.07470703125, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.375, "sft_loss": 0.58984375, "step": 1228 }, { "dpo_loss": 0.2578125, "epoch": 0.2, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.148954227319162e-07, "loss": 0.1504, "projector_lr": 2.744686268195749e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.484375, "rewards_train/margins": 2.3125, "rewards_train/rejected": -2.8125, "sft_loss": 0.984375, "step": 1229 }, { "dpo_loss": 0.23046875, "epoch": 0.2, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 9.147536816185533e-07, "loss": 0.2624, "projector_lr": 2.7442610448556603e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.0, "sft_loss": 0.71484375, "step": 1230 }, { "dpo_loss": 0.3515625, "epoch": 0.2, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 9.146118335677578e-07, "loss": 0.2639, "projector_lr": 2.7438355007032736e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5, "rewards_train/margins": 1.9453125, "rewards_train/rejected": -2.453125, "sft_loss": 0.8203125, "step": 1231 }, { "dpo_loss": 0.33203125, "epoch": 0.2, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 9.144698786161027e-07, "loss": 0.2433, "projector_lr": 2.7434096358483083e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.5, "sft_loss": 0.703125, "step": 1232 }, { "dpo_loss": 0.2041015625, "epoch": 0.2, "final_loss": 0.2041015625, "grad_norm": 0.0, "learning_rate": 9.143278168001887e-07, "loss": 0.1702, "projector_lr": 2.7429834504005663e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.018798828125, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.0625, "sft_loss": 0.796875, "step": 1233 }, { "dpo_loss": 0.2216796875, "epoch": 0.2, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 9.141856481566441e-07, "loss": 0.3972, "projector_lr": 2.7425569444699325e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3359375, "rewards_train/margins": 1.890625, "rewards_train/rejected": -2.21875, "sft_loss": 0.75, "step": 1234 }, { "dpo_loss": 0.283203125, "epoch": 0.2, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 9.140433727221251e-07, "loss": 0.2625, "projector_lr": 2.7421301181663752e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.765625, "sft_loss": 0.85546875, "step": 1235 }, { "dpo_loss": 0.23828125, "epoch": 0.2, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 9.139009905333146e-07, "loss": 0.3693, "projector_lr": 2.741702971599944e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.171875, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.390625, "sft_loss": 0.80078125, "step": 1236 }, { "dpo_loss": 0.57421875, "epoch": 0.2, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 9.13758501626924e-07, "loss": 0.4543, "projector_lr": 2.7412755048807724e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 1.84375, "rewards_train/rejected": -2.6875, "sft_loss": 0.74609375, "step": 1237 }, { "dpo_loss": 0.3125, "epoch": 0.2, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 9.136159060396912e-07, "loss": 0.2187, "projector_lr": 2.740847718119074e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1708984375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.53125, "sft_loss": 0.71484375, "step": 1238 }, { "dpo_loss": 0.0703125, "epoch": 0.2, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 9.134732038083826e-07, "loss": 0.118, "projector_lr": 2.740419611425148e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.173828125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -3.96875, "sft_loss": 0.69140625, "step": 1239 }, { "dpo_loss": 0.130859375, "epoch": 0.2, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 9.133303949697915e-07, "loss": 0.1011, "projector_lr": 2.739991184909375e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.40625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -3.6875, "sft_loss": 0.94921875, "step": 1240 }, { "dpo_loss": 0.11767578125, "epoch": 0.2, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 9.131874795607386e-07, "loss": 0.1871, "projector_lr": 2.739562438682216e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0537109375, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.0625, "sft_loss": 0.671875, "step": 1241 }, { "dpo_loss": 0.142578125, "epoch": 0.2, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 9.130444576180724e-07, "loss": 0.138, "projector_lr": 2.7391333728542176e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.361328125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -2.71875, "sft_loss": 0.67578125, "step": 1242 }, { "dpo_loss": 0.0250244140625, "epoch": 0.2, "final_loss": 0.0250244140625, "grad_norm": 0.0, "learning_rate": 9.129013291786689e-07, "loss": 0.1657, "projector_lr": 2.738703987536007e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.96875, "sft_loss": 0.71484375, "step": 1243 }, { "dpo_loss": 0.203125, "epoch": 0.2, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 9.127580942794314e-07, "loss": 0.4129, "projector_lr": 2.7382742828382943e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.41015625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.375, "sft_loss": 1.0390625, "step": 1244 }, { "dpo_loss": 0.68359375, "epoch": 0.2, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 9.126147529572905e-07, "loss": 0.5233, "projector_lr": 2.737844258871872e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": 0.1708984375, "rewards_train/margins": 1.7578125, "rewards_train/rejected": -1.5859375, "sft_loss": 0.74609375, "step": 1245 }, { "dpo_loss": 0.08154296875, "epoch": 0.2, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 9.124713052492045e-07, "loss": 0.0818, "projector_lr": 2.7374139157476137e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2060546875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -3.140625, "sft_loss": 0.5546875, "step": 1246 }, { "dpo_loss": 0.83203125, "epoch": 0.2, "final_loss": 0.83203125, "grad_norm": 0.0, "learning_rate": 9.123277511921592e-07, "loss": 0.5034, "projector_lr": 2.736983253576478e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.90625, "sft_loss": 0.91796875, "step": 1247 }, { "dpo_loss": 0.13671875, "epoch": 0.2, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 9.121840908231674e-07, "loss": 0.2172, "projector_lr": 2.736552272469502e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0147705078125, "rewards_train/margins": 3.703125, "rewards_train/rejected": -3.6875, "sft_loss": 0.59375, "step": 1248 }, { "dpo_loss": 0.37109375, "epoch": 0.2, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 9.120403241792696e-07, "loss": 0.2761, "projector_lr": 2.736120972537809e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 1.5703125, "rewards_train/rejected": -1.765625, "sft_loss": 0.66015625, "step": 1249 }, { "dpo_loss": 0.3125, "epoch": 0.2, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 9.118964512975338e-07, "loss": 0.3015, "projector_lr": 2.7356893538926014e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.28125, "rewards_train/margins": 3.578125, "rewards_train/rejected": -3.84375, "sft_loss": 0.90625, "step": 1250 }, { "dpo_loss": 0.3046875, "epoch": 0.2, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.117524722150553e-07, "loss": 0.2738, "projector_lr": 2.735257416645166e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.001220703125, "rewards_train/margins": 3.828125, "rewards_train/rejected": -3.828125, "sft_loss": 0.71875, "step": 1251 }, { "dpo_loss": 0.427734375, "epoch": 0.2, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 9.116083869689565e-07, "loss": 0.3429, "projector_lr": 2.7348251609068697e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.34375, "rewards_train/margins": 2.4375, "rewards_train/rejected": -2.78125, "sft_loss": 0.7890625, "step": 1252 }, { "dpo_loss": 0.296875, "epoch": 0.2, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 9.114641955963875e-07, "loss": 0.2505, "projector_lr": 2.734392586789163e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.08154296875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -4.59375, "sft_loss": 0.7421875, "step": 1253 }, { "dpo_loss": 0.671875, "epoch": 0.2, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 9.11319898134526e-07, "loss": 0.4792, "projector_lr": 2.7339596944035783e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -2.046875, "sft_loss": 0.6015625, "step": 1254 }, { "dpo_loss": 0.087890625, "epoch": 0.2, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 9.111754946205763e-07, "loss": 0.0953, "projector_lr": 2.733526483861729e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 3.84375, "rewards_train/rejected": -4.09375, "sft_loss": 0.73046875, "step": 1255 }, { "dpo_loss": 0.15234375, "epoch": 0.2, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 9.110309850917708e-07, "loss": 0.1647, "projector_lr": 2.7330929552753125e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.4375, "sft_loss": 0.87890625, "step": 1256 }, { "dpo_loss": 0.173828125, "epoch": 0.2, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 9.108863695853687e-07, "loss": 0.183, "projector_lr": 2.732659108756106e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 1.140625, "rewards_train/margins": 2.390625, "rewards_train/rejected": -1.265625, "sft_loss": 0.61328125, "step": 1257 }, { "dpo_loss": 0.30859375, "epoch": 0.2, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 9.107416481386568e-07, "loss": 0.2528, "projector_lr": 2.7322249444159704e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 1.65625, "rewards_train/rejected": -2.5, "sft_loss": 0.640625, "step": 1258 }, { "dpo_loss": 0.1904296875, "epoch": 0.2, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 9.105968207889492e-07, "loss": 0.391, "projector_lr": 2.7317904623668476e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 2.25, "rewards_train/rejected": -2.75, "sft_loss": 0.75, "step": 1259 }, { "dpo_loss": 0.24609375, "epoch": 0.2, "final_loss": 0.24609375, "grad_norm": 0.0, "learning_rate": 9.104518875735872e-07, "loss": 0.3595, "projector_lr": 2.7313556627207616e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.486328125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.09375, "sft_loss": 0.419921875, "step": 1260 }, { "dpo_loss": 0.50390625, "epoch": 0.2, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.103068485299392e-07, "loss": 0.3163, "projector_lr": 2.7309205455898177e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.984375, "sft_loss": 0.8515625, "step": 1261 }, { "dpo_loss": 0.1826171875, "epoch": 0.2, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 9.101617036954016e-07, "loss": 0.1423, "projector_lr": 2.7304851110862053e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.66015625, "rewards_train/margins": 3.890625, "rewards_train/rejected": -3.234375, "sft_loss": 0.5234375, "step": 1262 }, { "dpo_loss": 0.39453125, "epoch": 0.2, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 9.100164531073976e-07, "loss": 0.4392, "projector_lr": 2.730049359322193e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.203125, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.28125, "sft_loss": 0.73828125, "step": 1263 }, { "dpo_loss": 0.546875, "epoch": 0.2, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 9.098710968033773e-07, "loss": 0.3656, "projector_lr": 2.7296132904101323e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.71875, "sft_loss": 0.56640625, "step": 1264 }, { "dpo_loss": 0.22265625, "epoch": 0.2, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 9.097256348208186e-07, "loss": 0.449, "projector_lr": 2.729176904462456e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.031494140625, "rewards_train/margins": 3.015625, "rewards_train/rejected": -3.046875, "sft_loss": 0.72265625, "step": 1265 }, { "dpo_loss": 0.1279296875, "epoch": 0.2, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 9.095800671972267e-07, "loss": 0.1931, "projector_lr": 2.7287402015916803e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.177734375, "rewards_train/margins": 3.484375, "rewards_train/rejected": -3.65625, "sft_loss": 0.69140625, "step": 1266 }, { "dpo_loss": 0.61328125, "epoch": 0.2, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 9.094343939701337e-07, "loss": 0.3552, "projector_lr": 2.7283031819104014e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 2.109375, "rewards_train/rejected": -2.453125, "sft_loss": 0.859375, "step": 1267 }, { "dpo_loss": 0.29296875, "epoch": 0.2, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 9.092886151770986e-07, "loss": 0.2743, "projector_lr": 2.727865845531296e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.28125, "sft_loss": 0.78125, "step": 1268 }, { "dpo_loss": 0.22265625, "epoch": 0.2, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 9.091427308557088e-07, "loss": 0.2096, "projector_lr": 2.727428192567127e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.197265625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.28125, "sft_loss": 0.94140625, "step": 1269 }, { "dpo_loss": 0.361328125, "epoch": 0.2, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.089967410435779e-07, "loss": 0.2528, "projector_lr": 2.7269902231307337e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.015625, "sft_loss": 0.63671875, "step": 1270 }, { "dpo_loss": 0.1845703125, "epoch": 0.2, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.088506457783467e-07, "loss": 0.2702, "projector_lr": 2.7265519373350404e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.053466796875, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.25, "sft_loss": 0.703125, "step": 1271 }, { "dpo_loss": 0.1826171875, "epoch": 0.2, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 9.087044450976838e-07, "loss": 0.1262, "projector_lr": 2.726113335293051e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.34765625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.25, "sft_loss": 0.64453125, "step": 1272 }, { "dpo_loss": 0.77734375, "epoch": 0.2, "final_loss": 0.77734375, "grad_norm": 0.0, "learning_rate": 9.085581390392845e-07, "loss": 0.534, "projector_lr": 2.7256744171178535e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.328125, "rewards_train/margins": 1.6328125, "rewards_train/rejected": -1.9609375, "sft_loss": 0.78515625, "step": 1273 }, { "dpo_loss": 0.369140625, "epoch": 0.2, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 9.084117276408715e-07, "loss": 0.2404, "projector_lr": 2.7252351829226147e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 2.375, "rewards_train/rejected": -2.640625, "sft_loss": 0.84375, "step": 1274 }, { "dpo_loss": 0.326171875, "epoch": 0.2, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 9.082652109401945e-07, "loss": 0.3226, "projector_lr": 2.7247956328205835e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3125, "rewards_train/margins": 1.546875, "rewards_train/rejected": -1.859375, "sft_loss": 0.6875, "step": 1275 }, { "dpo_loss": 0.43359375, "epoch": 0.2, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 9.081185889750304e-07, "loss": 0.3858, "projector_lr": 2.7243557669250917e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.06591796875, "rewards_train/margins": 1.3984375, "rewards_train/rejected": -1.3359375, "sft_loss": 0.859375, "step": 1276 }, { "dpo_loss": 0.58984375, "epoch": 0.2, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 9.079718617831835e-07, "loss": 0.4492, "projector_lr": 2.7239155853495506e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 1.6640625, "rewards_train/rejected": -2.96875, "sft_loss": 0.8359375, "step": 1277 }, { "dpo_loss": 0.333984375, "epoch": 0.2, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 9.078250294024847e-07, "loss": 0.2237, "projector_lr": 2.7234750882074546e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.0595703125, "rewards_train/margins": 2.34375, "rewards_train/rejected": -2.40625, "sft_loss": 0.66796875, "step": 1278 }, { "dpo_loss": 0.392578125, "epoch": 0.2, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 9.076780918707925e-07, "loss": 0.221, "projector_lr": 2.7230342756123777e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.0615234375, "rewards_train/margins": 1.984375, "rewards_train/rejected": -2.046875, "sft_loss": 1.0, "step": 1279 }, { "dpo_loss": 0.25, "epoch": 0.2, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 9.075310492259922e-07, "loss": 0.3645, "projector_lr": 2.722593147677977e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.640625, "sft_loss": 0.69140625, "step": 1280 }, { "dpo_loss": 0.1396484375, "epoch": 0.2, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 9.073839015059966e-07, "loss": 0.0907, "projector_lr": 2.72215170451799e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0267333984375, "rewards_train/margins": 3.515625, "rewards_train/rejected": -3.484375, "sft_loss": 0.56640625, "step": 1281 }, { "dpo_loss": 0.171875, "epoch": 0.21, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 9.072366487487451e-07, "loss": 0.3915, "projector_lr": 2.7217099462462354e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.265625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -3.4375, "sft_loss": 0.68359375, "step": 1282 }, { "dpo_loss": 0.318359375, "epoch": 0.21, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.070892909922043e-07, "loss": 0.2448, "projector_lr": 2.7212678729766133e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.33203125, "rewards_train/margins": 2.203125, "rewards_train/rejected": -1.8828125, "sft_loss": 0.734375, "step": 1283 }, { "dpo_loss": 0.130859375, "epoch": 0.21, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 9.069418282743681e-07, "loss": 0.2152, "projector_lr": 2.7208254848231047e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.009521484375, "rewards_train/margins": 3.90625, "rewards_train/rejected": -3.921875, "sft_loss": 0.5859375, "step": 1284 }, { "dpo_loss": 0.07373046875, "epoch": 0.21, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 9.067942606332575e-07, "loss": 0.0551, "projector_lr": 2.7203827818997725e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.34375, "sft_loss": 0.58203125, "step": 1285 }, { "dpo_loss": 0.6015625, "epoch": 0.21, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 9.066465881069203e-07, "loss": 0.4759, "projector_lr": 2.719939764320761e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 2.03125, "rewards_train/rejected": -2.28125, "sft_loss": 0.71875, "step": 1286 }, { "dpo_loss": 0.12158203125, "epoch": 0.21, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 9.064988107334313e-07, "loss": 0.1108, "projector_lr": 2.719496432200294e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.04443359375, "rewards_train/margins": 3.28125, "rewards_train/rejected": -3.328125, "sft_loss": 0.7265625, "step": 1287 }, { "dpo_loss": 0.349609375, "epoch": 0.21, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 9.063509285508926e-07, "loss": 0.2306, "projector_lr": 2.7190527856526784e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.244140625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -3.578125, "sft_loss": 0.93359375, "step": 1288 }, { "dpo_loss": 0.126953125, "epoch": 0.21, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 9.062029415974333e-07, "loss": 0.3314, "projector_lr": 2.7186088247923e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4765625, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.203125, "sft_loss": 0.84765625, "step": 1289 }, { "dpo_loss": 0.134765625, "epoch": 0.21, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 9.060548499112091e-07, "loss": 0.1002, "projector_lr": 2.7181645497336276e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08056640625, "rewards_train/margins": 3.734375, "rewards_train/rejected": -3.8125, "sft_loss": 0.8515625, "step": 1290 }, { "dpo_loss": 0.05810546875, "epoch": 0.21, "final_loss": 0.05810546875, "grad_norm": 0.0, "learning_rate": 9.059066535304033e-07, "loss": 0.2505, "projector_lr": 2.71771996059121e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.51171875, "rewards_train/margins": 3.890625, "rewards_train/rejected": -3.390625, "sft_loss": 0.7265625, "step": 1291 }, { "dpo_loss": 0.87109375, "epoch": 0.21, "final_loss": 0.87109375, "grad_norm": 0.0, "learning_rate": 9.057583524932258e-07, "loss": 0.5167, "projector_lr": 2.7172750574796776e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.40234375, "rewards_train/margins": 1.6171875, "rewards_train/rejected": -2.015625, "sft_loss": 0.75390625, "step": 1292 }, { "dpo_loss": 0.318359375, "epoch": 0.21, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 9.056099468379135e-07, "loss": 0.2694, "projector_lr": 2.7168298405137405e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.01220703125, "rewards_train/margins": 3.140625, "rewards_train/rejected": -3.15625, "sft_loss": 0.58203125, "step": 1293 }, { "dpo_loss": 0.2373046875, "epoch": 0.21, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 9.054614366027306e-07, "loss": 0.2446, "projector_lr": 2.716384309808192e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3203125, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.0, "sft_loss": 0.71484375, "step": 1294 }, { "dpo_loss": 0.52734375, "epoch": 0.21, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 9.053128218259678e-07, "loss": 0.2852, "projector_lr": 2.7159384654779034e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.0771484375, "rewards_train/margins": 1.3125, "rewards_train/rejected": -1.390625, "sft_loss": 0.96875, "step": 1295 }, { "dpo_loss": 0.4453125, "epoch": 0.21, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 9.05164102545943e-07, "loss": 0.2733, "projector_lr": 2.715492307637829e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.765625, "sft_loss": 0.7421875, "step": 1296 }, { "dpo_loss": 0.2578125, "epoch": 0.21, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.050152788010011e-07, "loss": 0.2198, "projector_lr": 2.7150458364030035e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.00927734375, "rewards_train/margins": 2.125, "rewards_train/rejected": -2.125, "sft_loss": 0.9921875, "step": 1297 }, { "dpo_loss": 0.03662109375, "epoch": 0.21, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 9.048663506295139e-07, "loss": 0.0404, "projector_lr": 2.7145990518885417e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.025390625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.5, "sft_loss": 0.8984375, "step": 1298 }, { "dpo_loss": 0.34765625, "epoch": 0.21, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 9.0471731806988e-07, "loss": 0.2885, "projector_lr": 2.7141519542096404e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.015625, "sft_loss": 1.0, "step": 1299 }, { "dpo_loss": 0.259765625, "epoch": 0.21, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 9.04568181160525e-07, "loss": 0.1527, "projector_lr": 2.713704543481575e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2451171875, "rewards_train/margins": 4.5, "rewards_train/rejected": -4.71875, "sft_loss": 0.53125, "step": 1300 }, { "dpo_loss": 0.375, "epoch": 0.21, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 9.044189399399016e-07, "loss": 0.2438, "projector_lr": 2.713256819819705e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.4375, "sft_loss": 0.77734375, "step": 1301 }, { "dpo_loss": 0.029052734375, "epoch": 0.21, "final_loss": 0.029052734375, "grad_norm": 0.0, "learning_rate": 9.042695944464888e-07, "loss": 0.2083, "projector_lr": 2.712808783339467e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2373046875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.03125, "sft_loss": 0.62890625, "step": 1302 }, { "dpo_loss": 0.21484375, "epoch": 0.21, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 9.041201447187934e-07, "loss": 0.2328, "projector_lr": 2.7123604341563804e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.044189453125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -2.46875, "sft_loss": 0.671875, "step": 1303 }, { "dpo_loss": 0.3046875, "epoch": 0.21, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 9.039705907953481e-07, "loss": 0.2931, "projector_lr": 2.7119117723860443e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.171875, "sft_loss": 0.703125, "step": 1304 }, { "dpo_loss": 0.3203125, "epoch": 0.21, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 9.038209327147133e-07, "loss": 0.2856, "projector_lr": 2.71146279814414e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5, "rewards_train/margins": 3.25, "rewards_train/rejected": -3.75, "sft_loss": 0.73828125, "step": 1305 }, { "dpo_loss": 0.033447265625, "epoch": 0.21, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 9.036711705154755e-07, "loss": 0.3018, "projector_lr": 2.711013511546427e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05908203125, "rewards_train/margins": 4.8125, "rewards_train/rejected": -4.875, "sft_loss": 0.59375, "step": 1306 }, { "dpo_loss": 0.04296875, "epoch": 0.21, "final_loss": 0.04296875, "grad_norm": 0.0, "learning_rate": 9.035213042362486e-07, "loss": 0.1391, "projector_lr": 2.7105639127087462e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.068359375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.0, "sft_loss": 0.74609375, "step": 1307 }, { "dpo_loss": 0.5234375, "epoch": 0.21, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 9.033713339156733e-07, "loss": 0.3289, "projector_lr": 2.7101140017470203e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 1.6875, "rewards_train/rejected": -2.296875, "sft_loss": 0.66796875, "step": 1308 }, { "dpo_loss": 0.1455078125, "epoch": 0.21, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 9.032212595924169e-07, "loss": 0.2101, "projector_lr": 2.7096637787772507e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.59375, "rewards_train/margins": 4.125, "rewards_train/rejected": -3.53125, "sft_loss": 0.6875, "step": 1309 }, { "dpo_loss": 0.2294921875, "epoch": 0.21, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 9.030710813051734e-07, "loss": 0.3558, "projector_lr": 2.7092132439155203e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.314453125, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.171875, "sft_loss": 0.6015625, "step": 1310 }, { "dpo_loss": 0.1435546875, "epoch": 0.21, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 9.029207990926642e-07, "loss": 0.2504, "projector_lr": 2.7087623972779925e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 3.515625, "rewards_train/rejected": -3.65625, "sft_loss": 0.9375, "step": 1311 }, { "dpo_loss": 0.26953125, "epoch": 0.21, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 9.027704129936367e-07, "loss": 0.2612, "projector_lr": 2.70831123898091e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 2.53125, "rewards_train/rejected": -3.8125, "sft_loss": 0.921875, "step": 1312 }, { "dpo_loss": 0.1884765625, "epoch": 0.21, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 9.026199230468656e-07, "loss": 0.1414, "projector_lr": 2.707859769140597e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1875, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.1875, "sft_loss": 0.66796875, "step": 1313 }, { "dpo_loss": 0.1728515625, "epoch": 0.21, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 9.024693292911525e-07, "loss": 0.1004, "projector_lr": 2.707407987873458e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.283203125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.109375, "sft_loss": 0.68359375, "step": 1314 }, { "dpo_loss": 0.32421875, "epoch": 0.21, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 9.023186317653255e-07, "loss": 0.1976, "projector_lr": 2.7069558952959765e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1875, "rewards_train/margins": 3.375, "rewards_train/rejected": -3.5625, "sft_loss": 0.66015625, "step": 1315 }, { "dpo_loss": 0.0849609375, "epoch": 0.21, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 9.02167830508239e-07, "loss": 0.1653, "projector_lr": 2.7065034915247173e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0189208984375, "rewards_train/margins": 2.796875, "rewards_train/rejected": -2.8125, "sft_loss": 0.6875, "step": 1316 }, { "dpo_loss": 0.1767578125, "epoch": 0.21, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 9.020169255587752e-07, "loss": 0.3909, "projector_lr": 2.706050776676326e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 2.8125, "rewards_train/rejected": -3.515625, "sft_loss": 0.58984375, "step": 1317 }, { "dpo_loss": 0.361328125, "epoch": 0.21, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.018659169558423e-07, "loss": 0.2292, "projector_lr": 2.705597750867527e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 1.8125, "rewards_train/rejected": -2.6875, "sft_loss": 0.8984375, "step": 1318 }, { "dpo_loss": 0.0167236328125, "epoch": 0.21, "final_loss": 0.0167236328125, "grad_norm": 0.0, "learning_rate": 9.017148047383751e-07, "loss": 0.1189, "projector_lr": 2.7051444142151257e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.369140625, "rewards_train/margins": 6.34375, "rewards_train/rejected": -5.96875, "sft_loss": 0.6015625, "step": 1319 }, { "dpo_loss": 0.83984375, "epoch": 0.21, "final_loss": 0.83984375, "grad_norm": 0.0, "learning_rate": 9.015635889453359e-07, "loss": 0.4512, "projector_lr": 2.704690766836008e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.5625, "rewards_train/margins": 1.1328125, "rewards_train/rejected": -1.6953125, "sft_loss": 1.015625, "step": 1320 }, { "dpo_loss": 0.1279296875, "epoch": 0.21, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 9.014122696157127e-07, "loss": 0.1035, "projector_lr": 2.7042368088471384e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.25, "rewards_train/margins": 4.9375, "rewards_train/rejected": -4.6875, "sft_loss": 0.88671875, "step": 1321 }, { "dpo_loss": 0.169921875, "epoch": 0.21, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 9.012608467885212e-07, "loss": 0.163, "projector_lr": 2.7037825403655638e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.357421875, "rewards_train/margins": 3.140625, "rewards_train/rejected": -3.5, "sft_loss": 0.84765625, "step": 1322 }, { "dpo_loss": 0.08935546875, "epoch": 0.21, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 9.011093205028029e-07, "loss": 0.0977, "projector_lr": 2.7033279615084088e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.232421875, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.453125, "sft_loss": 0.70703125, "step": 1323 }, { "dpo_loss": 0.337890625, "epoch": 0.21, "final_loss": 0.337890625, "grad_norm": 0.0, "learning_rate": 9.009576907976267e-07, "loss": 0.2518, "projector_lr": 2.70287307239288e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.166015625, "rewards_train/margins": 1.671875, "rewards_train/rejected": -1.8359375, "sft_loss": 0.6796875, "step": 1324 }, { "dpo_loss": 0.1845703125, "epoch": 0.21, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.008059577120875e-07, "loss": 0.2684, "projector_lr": 2.7024178731362627e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.43359375, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.890625, "sft_loss": 0.87109375, "step": 1325 }, { "dpo_loss": 0.345703125, "epoch": 0.21, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 9.006541212853072e-07, "loss": 0.1804, "projector_lr": 2.701962363855922e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.12353515625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -2.953125, "sft_loss": 0.84375, "step": 1326 }, { "dpo_loss": 0.2021484375, "epoch": 0.21, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 9.005021815564347e-07, "loss": 0.4347, "projector_lr": 2.701506544669304e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1337890625, "rewards_train/margins": 3.5, "rewards_train/rejected": -3.640625, "sft_loss": 0.83203125, "step": 1327 }, { "dpo_loss": 0.4140625, "epoch": 0.21, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 9.003501385646448e-07, "loss": 0.4057, "projector_lr": 2.701050415693935e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.11669921875, "rewards_train/margins": 3.28125, "rewards_train/rejected": -3.171875, "sft_loss": 0.66796875, "step": 1328 }, { "dpo_loss": 0.2236328125, "epoch": 0.21, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 9.001979923491394e-07, "loss": 0.1291, "projector_lr": 2.7005939770474183e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.15625, "sft_loss": 0.6171875, "step": 1329 }, { "dpo_loss": 0.08154296875, "epoch": 0.21, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 9.000457429491471e-07, "loss": 0.2081, "projector_lr": 2.7001372288474414e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.25, "sft_loss": 0.65625, "step": 1330 }, { "dpo_loss": 0.11328125, "epoch": 0.21, "final_loss": 0.11328125, "grad_norm": 0.0, "learning_rate": 8.998933904039226e-07, "loss": 0.4326, "projector_lr": 2.6996801712117683e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.310546875, "rewards_train/margins": 3.65625, "rewards_train/rejected": -3.984375, "sft_loss": 1.015625, "step": 1331 }, { "dpo_loss": 0.201171875, "epoch": 0.21, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 8.997409347527477e-07, "loss": 0.2634, "projector_lr": 2.6992228042582435e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.421875, "sft_loss": 0.625, "step": 1332 }, { "dpo_loss": 0.0732421875, "epoch": 0.21, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 8.995883760349306e-07, "loss": 0.1525, "projector_lr": 2.698765128104792e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.6875, "sft_loss": 0.921875, "step": 1333 }, { "dpo_loss": 0.034912109375, "epoch": 0.21, "final_loss": 0.034912109375, "grad_norm": 0.0, "learning_rate": 8.994357142898059e-07, "loss": 0.2735, "projector_lr": 2.698307142869418e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.146484375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.84375, "sft_loss": 0.77734375, "step": 1334 }, { "dpo_loss": 0.17578125, "epoch": 0.21, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 8.992829495567352e-07, "loss": 0.3857, "projector_lr": 2.6978488486702055e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1796875, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.1875, "sft_loss": 0.81640625, "step": 1335 }, { "dpo_loss": 0.32421875, "epoch": 0.21, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 8.99130081875106e-07, "loss": 0.2474, "projector_lr": 2.697390245625318e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -3.828125, "sft_loss": 0.8046875, "step": 1336 }, { "dpo_loss": 0.38671875, "epoch": 0.21, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 8.989771112843329e-07, "loss": 0.2104, "projector_lr": 2.696931333852999e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 2.53125, "rewards_train/rejected": -4.09375, "sft_loss": 0.765625, "step": 1337 }, { "dpo_loss": 0.25, "epoch": 0.21, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 8.98824037823857e-07, "loss": 0.2651, "projector_lr": 2.696472113471571e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.546875, "rewards_train/margins": 2.765625, "rewards_train/rejected": -4.3125, "sft_loss": 0.58203125, "step": 1338 }, { "dpo_loss": 0.11962890625, "epoch": 0.21, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 8.986708615331457e-07, "loss": 0.2633, "projector_lr": 2.696012584599437e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.375, "sft_loss": 0.8515625, "step": 1339 }, { "dpo_loss": 0.1103515625, "epoch": 0.21, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 8.985175824516928e-07, "loss": 0.0768, "projector_lr": 2.6955527473550786e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.09375, "sft_loss": 0.7421875, "step": 1340 }, { "dpo_loss": 0.310546875, "epoch": 0.21, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 8.983642006190191e-07, "loss": 0.2546, "projector_lr": 2.6950926018570576e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.71875, "sft_loss": 0.78125, "step": 1341 }, { "dpo_loss": 0.2158203125, "epoch": 0.21, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 8.982107160746715e-07, "loss": 0.1879, "projector_lr": 2.6946321482240145e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.609375, "rewards_train/margins": 2.1875, "rewards_train/rejected": -3.796875, "sft_loss": 0.82421875, "step": 1342 }, { "dpo_loss": 0.36328125, "epoch": 0.21, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 8.980571288582233e-07, "loss": 0.2799, "projector_lr": 2.69417138657467e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 1.2109375, "rewards_train/rejected": -2.328125, "sft_loss": 0.625, "step": 1343 }, { "dpo_loss": 0.41015625, "epoch": 0.22, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 8.979034390092746e-07, "loss": 0.2762, "projector_lr": 2.693710317027824e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 1.109375, "rewards_train/rejected": -2.453125, "sft_loss": 0.81640625, "step": 1344 }, { "dpo_loss": 0.2353515625, "epoch": 0.22, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 8.977496465674518e-07, "loss": 0.179, "projector_lr": 2.6932489397023556e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 2.109375, "rewards_train/rejected": -3.0625, "sft_loss": 0.7421875, "step": 1345 }, { "dpo_loss": 0.2890625, "epoch": 0.22, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 8.975957515724077e-07, "loss": 0.5517, "projector_lr": 2.6927872547172235e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.8125, "sft_loss": 0.87109375, "step": 1346 }, { "dpo_loss": 0.045166015625, "epoch": 0.22, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 8.974417540638216e-07, "loss": 0.3085, "projector_lr": 2.692325262191465e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.96875, "sft_loss": 0.90625, "step": 1347 }, { "dpo_loss": 0.37890625, "epoch": 0.22, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 8.972876540813992e-07, "loss": 0.4885, "projector_lr": 2.6918629622441976e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.515625, "sft_loss": 0.80078125, "step": 1348 }, { "dpo_loss": 0.07568359375, "epoch": 0.22, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 8.971334516648729e-07, "loss": 0.0927, "projector_lr": 2.6914003549946187e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.03125, "sft_loss": 0.57421875, "step": 1349 }, { "dpo_loss": 0.08837890625, "epoch": 0.22, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 8.969791468540009e-07, "loss": 0.3522, "projector_lr": 2.6909374405620028e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46875, "rewards_train/margins": 3.5, "rewards_train/rejected": -3.953125, "sft_loss": 0.9140625, "step": 1350 }, { "dpo_loss": 0.1025390625, "epoch": 0.22, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 8.968247396885684e-07, "loss": 0.1841, "projector_lr": 2.6904742190657053e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.0, "sft_loss": 0.64453125, "step": 1351 }, { "dpo_loss": 0.0093994140625, "epoch": 0.22, "final_loss": 0.0093994140625, "grad_norm": 0.0, "learning_rate": 8.966702302083867e-07, "loss": 0.1817, "projector_lr": 2.69001069062516e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.21875, "sft_loss": 0.625, "step": 1352 }, { "dpo_loss": 0.291015625, "epoch": 0.22, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 8.965156184532935e-07, "loss": 0.1565, "projector_lr": 2.6895468553598807e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.0625, "sft_loss": 0.6015625, "step": 1353 }, { "dpo_loss": 0.087890625, "epoch": 0.22, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 8.96360904463153e-07, "loss": 0.1159, "projector_lr": 2.6890827133894595e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.65625, "sft_loss": 0.69921875, "step": 1354 }, { "dpo_loss": 0.0299072265625, "epoch": 0.22, "final_loss": 0.0299072265625, "grad_norm": 0.0, "learning_rate": 8.962060882778554e-07, "loss": 0.1358, "projector_lr": 2.6886182648335666e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.0625, "sft_loss": 0.68359375, "step": 1355 }, { "dpo_loss": 0.189453125, "epoch": 0.22, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 8.96051169937318e-07, "loss": 0.175, "projector_lr": 2.688153509811954e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.84375, "sft_loss": 0.61328125, "step": 1356 }, { "dpo_loss": 0.1884765625, "epoch": 0.22, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 8.958961494814835e-07, "loss": 0.2334, "projector_lr": 2.6876884484444506e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.25, "sft_loss": 0.70703125, "step": 1357 }, { "dpo_loss": 0.0751953125, "epoch": 0.22, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 8.957410269503216e-07, "loss": 0.2205, "projector_lr": 2.687223080850965e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.050048828125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.078125, "sft_loss": 0.828125, "step": 1358 }, { "dpo_loss": 0.23828125, "epoch": 0.22, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 8.95585802383828e-07, "loss": 0.3891, "projector_lr": 2.6867574071514842e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.09375, "sft_loss": 0.609375, "step": 1359 }, { "dpo_loss": 0.0283203125, "epoch": 0.22, "final_loss": 0.0283203125, "grad_norm": 0.0, "learning_rate": 8.954304758220248e-07, "loss": 0.3249, "projector_lr": 2.6862914274660747e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.46875, "sft_loss": 0.68359375, "step": 1360 }, { "dpo_loss": 0.94140625, "epoch": 0.22, "final_loss": 0.94140625, "grad_norm": 0.0, "learning_rate": 8.952750473049606e-07, "loss": 0.5468, "projector_lr": 2.685825141914882e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 1.671875, "rewards_train/rejected": -3.34375, "sft_loss": 0.828125, "step": 1361 }, { "dpo_loss": 0.13671875, "epoch": 0.22, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 8.951195168727099e-07, "loss": 0.1534, "projector_lr": 2.68535855061813e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.828125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.96875, "sft_loss": 0.875, "step": 1362 }, { "dpo_loss": 0.044189453125, "epoch": 0.22, "final_loss": 0.044189453125, "grad_norm": 0.0, "learning_rate": 8.949638845653737e-07, "loss": 0.0353, "projector_lr": 2.6848916536961213e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.78125, "sft_loss": 0.546875, "step": 1363 }, { "dpo_loss": 0.1279296875, "epoch": 0.22, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 8.948081504230792e-07, "loss": 0.0937, "projector_lr": 2.684424451269238e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.1875, "sft_loss": 0.86328125, "step": 1364 }, { "dpo_loss": 0.29296875, "epoch": 0.22, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 8.946523144859801e-07, "loss": 0.3345, "projector_lr": 2.6839569434579406e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.765625, "rewards_train/rejected": -4.21875, "sft_loss": 0.73046875, "step": 1365 }, { "dpo_loss": 0.36328125, "epoch": 0.22, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 8.944963767942559e-07, "loss": 0.3411, "projector_lr": 2.683489130382768e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.1875, "rewards_train/margins": 2.25, "rewards_train/rejected": -4.4375, "sft_loss": 0.7265625, "step": 1366 }, { "dpo_loss": 0.059814453125, "epoch": 0.22, "final_loss": 0.059814453125, "grad_norm": 0.0, "learning_rate": 8.943403373881127e-07, "loss": 0.2418, "projector_lr": 2.6830210121643383e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.5625, "sft_loss": 0.7109375, "step": 1367 }, { "dpo_loss": 0.296875, "epoch": 0.22, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 8.941841963077826e-07, "loss": 0.2515, "projector_lr": 2.6825525889233476e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.859375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -4.5625, "sft_loss": 0.703125, "step": 1368 }, { "dpo_loss": 0.1171875, "epoch": 0.22, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 8.94027953593524e-07, "loss": 0.1668, "projector_lr": 2.6820838607805722e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -5.1875, "sft_loss": 1.0859375, "step": 1369 }, { "dpo_loss": 0.5625, "epoch": 0.22, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 8.938716092856218e-07, "loss": 0.4393, "projector_lr": 2.6816148278568657e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 1.390625, "rewards_train/rejected": -3.140625, "sft_loss": 0.890625, "step": 1370 }, { "dpo_loss": 0.146484375, "epoch": 0.22, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 8.937151634243864e-07, "loss": 0.1168, "projector_lr": 2.6811454902731598e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.46875, "sft_loss": 0.7734375, "step": 1371 }, { "dpo_loss": 0.0693359375, "epoch": 0.22, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 8.935586160501554e-07, "loss": 0.169, "projector_lr": 2.680675848150466e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.28125, "sft_loss": 0.8828125, "step": 1372 }, { "dpo_loss": 0.388671875, "epoch": 0.22, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 8.934019672032913e-07, "loss": 0.2539, "projector_lr": 2.680205901609874e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.875, "sft_loss": 0.8984375, "step": 1373 }, { "dpo_loss": 0.451171875, "epoch": 0.22, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 8.932452169241838e-07, "loss": 0.2762, "projector_lr": 2.6797356507725515e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.203125, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.9375, "sft_loss": 0.78515625, "step": 1374 }, { "dpo_loss": 0.2236328125, "epoch": 0.22, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 8.930883652532483e-07, "loss": 0.3632, "projector_lr": 2.679265095759745e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 2.9375, "rewards_train/rejected": -4.0, "sft_loss": 0.78515625, "step": 1375 }, { "dpo_loss": 0.0791015625, "epoch": 0.22, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 8.929314122309264e-07, "loss": 0.1056, "projector_lr": 2.6787942366927793e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.4375, "sft_loss": 0.6484375, "step": 1376 }, { "dpo_loss": 0.255859375, "epoch": 0.22, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 8.927743578976859e-07, "loss": 0.1962, "projector_lr": 2.678323073693058e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.4375, "sft_loss": 0.66015625, "step": 1377 }, { "dpo_loss": 0.154296875, "epoch": 0.22, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 8.926172022940208e-07, "loss": 0.1542, "projector_lr": 2.6778516068820626e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 2.421875, "rewards_train/rejected": -3.0, "sft_loss": 0.85546875, "step": 1378 }, { "dpo_loss": 0.41015625, "epoch": 0.22, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 8.924599454604509e-07, "loss": 0.538, "projector_lr": 2.677379836381353e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 2.3125, "rewards_train/rejected": -3.203125, "sft_loss": 0.6640625, "step": 1379 }, { "dpo_loss": 0.3203125, "epoch": 0.22, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 8.923025874375226e-07, "loss": 0.2571, "projector_lr": 2.6769077623125676e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.609375, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.34375, "sft_loss": 0.82421875, "step": 1380 }, { "dpo_loss": 0.3671875, "epoch": 0.22, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 8.921451282658075e-07, "loss": 0.2944, "projector_lr": 2.676435384797423e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.171875, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.125, "sft_loss": 0.87890625, "step": 1381 }, { "dpo_loss": 0.39453125, "epoch": 0.22, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 8.919875679859045e-07, "loss": 0.2815, "projector_lr": 2.675962703957714e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.515625, "rewards_train/margins": 2.296875, "rewards_train/rejected": -3.8125, "sft_loss": 0.84765625, "step": 1382 }, { "dpo_loss": 0.169921875, "epoch": 0.22, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 8.918299066384376e-07, "loss": 0.2565, "projector_lr": 2.675489719915313e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.34375, "sft_loss": 1.0703125, "step": 1383 }, { "dpo_loss": 0.466796875, "epoch": 0.22, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 8.916721442640572e-07, "loss": 0.3481, "projector_lr": 2.6750164327921716e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 1.109375, "rewards_train/rejected": -1.6953125, "sft_loss": 0.890625, "step": 1384 }, { "dpo_loss": 0.1767578125, "epoch": 0.22, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 8.915142809034398e-07, "loss": 0.1454, "projector_lr": 2.67454284271032e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.1875, "sft_loss": 0.80859375, "step": 1385 }, { "dpo_loss": 0.1376953125, "epoch": 0.22, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 8.913563165972879e-07, "loss": 0.2491, "projector_lr": 2.674068949791864e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.65625, "sft_loss": 0.796875, "step": 1386 }, { "dpo_loss": 0.0947265625, "epoch": 0.22, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 8.9119825138633e-07, "loss": 0.1176, "projector_lr": 2.67359475415899e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.125, "sft_loss": 0.88671875, "step": 1387 }, { "dpo_loss": 0.34765625, "epoch": 0.22, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 8.910400853113206e-07, "loss": 0.193, "projector_lr": 2.673120255933962e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.796875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.75, "sft_loss": 0.7109375, "step": 1388 }, { "dpo_loss": 0.396484375, "epoch": 0.22, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 8.908818184130402e-07, "loss": 0.3774, "projector_lr": 2.6726454552391207e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 2.390625, "rewards_train/rejected": -3.46875, "sft_loss": 0.765625, "step": 1389 }, { "dpo_loss": 0.60546875, "epoch": 0.22, "final_loss": 0.60546875, "grad_norm": 0.0, "learning_rate": 8.907234507322953e-07, "loss": 0.3452, "projector_lr": 2.672170352196886e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.765625, "rewards_train/margins": 2.46875, "rewards_train/rejected": -4.25, "sft_loss": 0.7890625, "step": 1390 }, { "dpo_loss": 0.03759765625, "epoch": 0.22, "final_loss": 0.03759765625, "grad_norm": 0.0, "learning_rate": 8.905649823099185e-07, "loss": 0.1808, "projector_lr": 2.6716949469297557e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.40625, "sft_loss": 0.58984375, "step": 1391 }, { "dpo_loss": 0.546875, "epoch": 0.22, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 8.904064131867683e-07, "loss": 0.5846, "projector_lr": 2.671219239560305e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.1875, "sft_loss": 0.78515625, "step": 1392 }, { "dpo_loss": 0.380859375, "epoch": 0.22, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 8.90247743403729e-07, "loss": 0.6091, "projector_lr": 2.6707432302111874e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.21875, "sft_loss": 0.7578125, "step": 1393 }, { "dpo_loss": 0.1806640625, "epoch": 0.22, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 8.900889730017112e-07, "loss": 0.172, "projector_lr": 2.6702669190051335e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.09375, "sft_loss": 0.76953125, "step": 1394 }, { "dpo_loss": 0.205078125, "epoch": 0.22, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 8.89930102021651e-07, "loss": 0.2907, "projector_lr": 2.669790306064953e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 2.03125, "rewards_train/rejected": -3.625, "sft_loss": 0.6171875, "step": 1395 }, { "dpo_loss": 0.1884765625, "epoch": 0.22, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 8.897711305045109e-07, "loss": 0.5326, "projector_lr": 2.6693133915135328e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.921875, "sft_loss": 0.92578125, "step": 1396 }, { "dpo_loss": 0.271484375, "epoch": 0.22, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 8.896120584912789e-07, "loss": 0.245, "projector_lr": 2.6688361754738372e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.21875, "rewards_train/margins": 2.921875, "rewards_train/rejected": -5.125, "sft_loss": 0.875, "step": 1397 }, { "dpo_loss": 0.134765625, "epoch": 0.22, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 8.894528860229693e-07, "loss": 0.2117, "projector_lr": 2.668358658068908e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.21875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -5.375, "sft_loss": 0.7265625, "step": 1398 }, { "dpo_loss": 0.18359375, "epoch": 0.22, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 8.89293613140622e-07, "loss": 0.3136, "projector_lr": 2.6678808394218664e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.75, "sft_loss": 0.87890625, "step": 1399 }, { "dpo_loss": 0.10546875, "epoch": 0.22, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 8.891342398853031e-07, "loss": 0.1899, "projector_lr": 2.667402719655909e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 3.4375, "rewards_train/rejected": -5.0, "sft_loss": 1.0703125, "step": 1400 }, { "dpo_loss": 0.48046875, "epoch": 0.22, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 8.88974766298104e-07, "loss": 0.3019, "projector_lr": 2.6669242988943123e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.40625, "sft_loss": 0.703125, "step": 1401 }, { "dpo_loss": 0.1484375, "epoch": 0.22, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 8.888151924201424e-07, "loss": 0.2068, "projector_lr": 2.6664455772604275e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9765625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -6.0, "sft_loss": 0.73046875, "step": 1402 }, { "dpo_loss": 0.357421875, "epoch": 0.22, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 8.886555182925623e-07, "loss": 0.2633, "projector_lr": 2.665966554877687e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 1.953125, "rewards_train/rejected": -3.359375, "sft_loss": 0.64453125, "step": 1403 }, { "dpo_loss": 0.64453125, "epoch": 0.22, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 8.884957439565325e-07, "loss": 0.3899, "projector_lr": 2.6654872318695976e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 2.78125, "rewards_train/rejected": -4.75, "sft_loss": 0.828125, "step": 1404 }, { "dpo_loss": 0.1416015625, "epoch": 0.22, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 8.883358694532484e-07, "loss": 0.2271, "projector_lr": 2.6650076083597454e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 3.015625, "rewards_train/rejected": -3.484375, "sft_loss": 0.828125, "step": 1405 }, { "dpo_loss": 0.41796875, "epoch": 0.22, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 8.881758948239312e-07, "loss": 0.4205, "projector_lr": 2.6645276844717937e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.21875, "sft_loss": 0.8203125, "step": 1406 }, { "dpo_loss": 0.20703125, "epoch": 0.23, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 8.880158201098275e-07, "loss": 0.2108, "projector_lr": 2.6640474603294827e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.859375, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.71875, "sft_loss": 0.796875, "step": 1407 }, { "dpo_loss": 0.083984375, "epoch": 0.23, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 8.878556453522099e-07, "loss": 0.3199, "projector_lr": 2.6635669360566298e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.28125, "sft_loss": 0.6796875, "step": 1408 }, { "dpo_loss": 0.1416015625, "epoch": 0.23, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 8.876953705923769e-07, "loss": 0.2737, "projector_lr": 2.663086111777131e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.796875, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.71875, "sft_loss": 0.78515625, "step": 1409 }, { "dpo_loss": 0.1748046875, "epoch": 0.23, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 8.875349958716529e-07, "loss": 0.2113, "projector_lr": 2.662604987614959e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.734375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.78125, "sft_loss": 0.84375, "step": 1410 }, { "dpo_loss": 0.267578125, "epoch": 0.23, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 8.873745212313877e-07, "loss": 0.5305, "projector_lr": 2.6621235636941633e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 3.265625, "rewards_train/rejected": -4.96875, "sft_loss": 0.8046875, "step": 1411 }, { "dpo_loss": 0.18359375, "epoch": 0.23, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 8.872139467129569e-07, "loss": 0.3083, "projector_lr": 2.661641840138871e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -4.34375, "sft_loss": 0.625, "step": 1412 }, { "dpo_loss": 0.2119140625, "epoch": 0.23, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 8.870532723577624e-07, "loss": 0.2828, "projector_lr": 2.6611598170732873e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.59375, "sft_loss": 0.53125, "step": 1413 }, { "dpo_loss": 0.2392578125, "epoch": 0.23, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 8.868924982072312e-07, "loss": 0.3738, "projector_lr": 2.660677494621694e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 2.046875, "rewards_train/rejected": -3.015625, "sft_loss": 0.8828125, "step": 1414 }, { "dpo_loss": 0.2490234375, "epoch": 0.23, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 8.867316243028164e-07, "loss": 0.1755, "projector_lr": 2.660194872908449e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 1.90625, "rewards_train/rejected": -3.203125, "sft_loss": 0.69921875, "step": 1415 }, { "dpo_loss": 0.1708984375, "epoch": 0.23, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 8.865706506859965e-07, "loss": 0.286, "projector_lr": 2.6597119520579894e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.71875, "sft_loss": 0.55859375, "step": 1416 }, { "dpo_loss": 0.216796875, "epoch": 0.23, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 8.864095773982761e-07, "loss": 0.1712, "projector_lr": 2.6592287321948285e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -4.40625, "sft_loss": 0.67578125, "step": 1417 }, { "dpo_loss": 0.625, "epoch": 0.23, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 8.862484044811851e-07, "loss": 0.5261, "projector_lr": 2.6587452134435556e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.59375, "rewards_train/margins": 0.7109375, "rewards_train/rejected": -2.296875, "sft_loss": 0.9296875, "step": 1418 }, { "dpo_loss": 0.21875, "epoch": 0.23, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 8.860871319762795e-07, "loss": 0.2671, "projector_lr": 2.658261395928839e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.84375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.53125, "sft_loss": 0.671875, "step": 1419 }, { "dpo_loss": 0.0380859375, "epoch": 0.23, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 8.859257599251406e-07, "loss": 0.2477, "projector_lr": 2.657777279775422e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.625, "sft_loss": 0.63671875, "step": 1420 }, { "dpo_loss": 0.2314453125, "epoch": 0.23, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 8.857642883693759e-07, "loss": 0.2631, "projector_lr": 2.657292865108128e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.75, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.28125, "sft_loss": 0.890625, "step": 1421 }, { "dpo_loss": 0.3984375, "epoch": 0.23, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 8.856027173506177e-07, "loss": 0.2333, "projector_lr": 2.6568081520518533e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.140625, "rewards_train/margins": 1.8125, "rewards_train/rejected": -3.953125, "sft_loss": 0.80859375, "step": 1422 }, { "dpo_loss": 0.099609375, "epoch": 0.23, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 8.854410469105247e-07, "loss": 0.2104, "projector_lr": 2.6563231407315743e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.09375, "sft_loss": 0.88671875, "step": 1423 }, { "dpo_loss": 0.365234375, "epoch": 0.23, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 8.852792770907809e-07, "loss": 0.4555, "projector_lr": 2.655837831272343e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.40625, "sft_loss": 0.84765625, "step": 1424 }, { "dpo_loss": 0.17578125, "epoch": 0.23, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 8.851174079330961e-07, "loss": 0.102, "projector_lr": 2.6553522237992883e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.75, "sft_loss": 0.6640625, "step": 1425 }, { "dpo_loss": 0.1005859375, "epoch": 0.23, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 8.849554394792053e-07, "loss": 0.0723, "projector_lr": 2.6548663184376164e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.0625, "sft_loss": 0.71484375, "step": 1426 }, { "dpo_loss": 0.18359375, "epoch": 0.23, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 8.847933717708699e-07, "loss": 0.196, "projector_lr": 2.65438011531261e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.0625, "sft_loss": 0.9765625, "step": 1427 }, { "dpo_loss": 0.236328125, "epoch": 0.23, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 8.846312048498761e-07, "loss": 0.2491, "projector_lr": 2.6538936145496283e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 3.21875, "rewards_train/rejected": -4.4375, "sft_loss": 0.8671875, "step": 1428 }, { "dpo_loss": 0.365234375, "epoch": 0.23, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 8.844689387580359e-07, "loss": 0.2654, "projector_lr": 2.653406816274108e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.34375, "sft_loss": 0.80859375, "step": 1429 }, { "dpo_loss": 0.119140625, "epoch": 0.23, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 8.843065735371871e-07, "loss": 0.0979, "projector_lr": 2.6529197206115616e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.25, "rewards_train/margins": 3.703125, "rewards_train/rejected": -5.96875, "sft_loss": 0.859375, "step": 1430 }, { "dpo_loss": 0.09765625, "epoch": 0.23, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 8.841441092291929e-07, "loss": 0.3119, "projector_lr": 2.652432327687579e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.875, "sft_loss": 0.84375, "step": 1431 }, { "dpo_loss": 0.1201171875, "epoch": 0.23, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 8.83981545875942e-07, "loss": 0.2566, "projector_lr": 2.6519446376278263e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.71875, "sft_loss": 1.0390625, "step": 1432 }, { "dpo_loss": 0.275390625, "epoch": 0.23, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 8.838188835193487e-07, "loss": 0.241, "projector_lr": 2.6514566505580465e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.5, "sft_loss": 0.82421875, "step": 1433 }, { "dpo_loss": 0.1064453125, "epoch": 0.23, "final_loss": 0.1064453125, "grad_norm": 0.0, "learning_rate": 8.836561222013529e-07, "loss": 0.3277, "projector_lr": 2.6509683666040594e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.5625, "sft_loss": 0.83984375, "step": 1434 }, { "dpo_loss": 0.146484375, "epoch": 0.23, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 8.834932619639199e-07, "loss": 0.3411, "projector_lr": 2.65047978589176e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.5, "rewards_train/rejected": -5.0, "sft_loss": 0.91796875, "step": 1435 }, { "dpo_loss": 0.482421875, "epoch": 0.23, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 8.833303028490405e-07, "loss": 0.4668, "projector_lr": 2.649990908547122e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.328125, "rewards_train/margins": 2.453125, "rewards_train/rejected": -4.78125, "sft_loss": 0.78515625, "step": 1436 }, { "dpo_loss": 0.298828125, "epoch": 0.23, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 8.83167244898731e-07, "loss": 0.2155, "projector_lr": 2.6495017346961933e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.75, "sft_loss": 0.9375, "step": 1437 }, { "dpo_loss": 0.05029296875, "epoch": 0.23, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 8.830040881550334e-07, "loss": 0.1569, "projector_lr": 2.6490122644651003e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.625, "sft_loss": 0.53515625, "step": 1438 }, { "dpo_loss": 0.0125732421875, "epoch": 0.23, "final_loss": 0.0125732421875, "grad_norm": 0.0, "learning_rate": 8.828408326600148e-07, "loss": 0.3111, "projector_lr": 2.6485224979800445e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.625, "sft_loss": 0.85546875, "step": 1439 }, { "dpo_loss": 0.47265625, "epoch": 0.23, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 8.82677478455768e-07, "loss": 0.2545, "projector_lr": 2.6480324353673043e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.4375, "rewards_train/margins": 2.953125, "rewards_train/rejected": -5.375, "sft_loss": 0.80078125, "step": 1440 }, { "dpo_loss": 0.16796875, "epoch": 0.23, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 8.825140255844112e-07, "loss": 0.1118, "projector_lr": 2.647542076753234e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.28125, "sft_loss": 0.83984375, "step": 1441 }, { "dpo_loss": 0.1796875, "epoch": 0.23, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 8.823504740880881e-07, "loss": 0.2347, "projector_lr": 2.6470514222642643e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.90625, "sft_loss": 0.73828125, "step": 1442 }, { "dpo_loss": 0.384765625, "epoch": 0.23, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 8.821868240089676e-07, "loss": 0.3876, "projector_lr": 2.646560472026903e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8125, "rewards_train/margins": 2.25, "rewards_train/rejected": -4.0625, "sft_loss": 0.63671875, "step": 1443 }, { "dpo_loss": 0.1796875, "epoch": 0.23, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 8.820230753892445e-07, "loss": 0.2833, "projector_lr": 2.6460692261677336e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.296875, "rewards_train/margins": 2.5625, "rewards_train/rejected": -4.84375, "sft_loss": 0.8125, "step": 1444 }, { "dpo_loss": 0.078125, "epoch": 0.23, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 8.818592282711384e-07, "loss": 0.1217, "projector_lr": 2.6455776848134156e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.75, "sft_loss": 0.8046875, "step": 1445 }, { "dpo_loss": 0.2021484375, "epoch": 0.23, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 8.816952826968948e-07, "loss": 0.2458, "projector_lr": 2.6450858480906844e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.625, "sft_loss": 0.734375, "step": 1446 }, { "dpo_loss": 0.064453125, "epoch": 0.23, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 8.815312387087841e-07, "loss": 0.121, "projector_lr": 2.6445937161263525e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.78125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -6.6875, "sft_loss": 0.84765625, "step": 1447 }, { "dpo_loss": 0.59765625, "epoch": 0.23, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 8.813670963491026e-07, "loss": 0.3906, "projector_lr": 2.644101289047308e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -3.21875, "rewards_train/margins": 1.671875, "rewards_train/rejected": -4.875, "sft_loss": 0.89453125, "step": 1448 }, { "dpo_loss": 0.09814453125, "epoch": 0.23, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 8.812028556601719e-07, "loss": 0.2072, "projector_lr": 2.6436085669805155e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.046875, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.8125, "sft_loss": 0.71484375, "step": 1449 }, { "dpo_loss": 0.443359375, "epoch": 0.23, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 8.810385166843381e-07, "loss": 0.4277, "projector_lr": 2.6431155500530145e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.734375, "rewards_train/margins": 1.8359375, "rewards_train/rejected": -3.578125, "sft_loss": 0.75390625, "step": 1450 }, { "dpo_loss": 0.263671875, "epoch": 0.23, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 8.808740794639739e-07, "loss": 0.2058, "projector_lr": 2.642622238391922e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.59375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.25, "sft_loss": 0.7421875, "step": 1451 }, { "dpo_loss": 0.09912109375, "epoch": 0.23, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 8.807095440414767e-07, "loss": 0.1153, "projector_lr": 2.64212863212443e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0625, "rewards_train/margins": 3.453125, "rewards_train/rejected": -5.53125, "sft_loss": 0.5859375, "step": 1452 }, { "dpo_loss": 0.30859375, "epoch": 0.23, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 8.805449104592689e-07, "loss": 0.2006, "projector_lr": 2.641634731377807e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 3.015625, "rewards_train/rejected": -4.90625, "sft_loss": 0.87109375, "step": 1453 }, { "dpo_loss": 0.388671875, "epoch": 0.23, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 8.803801787597989e-07, "loss": 0.3092, "projector_lr": 2.641140536279397e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.828125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -4.5, "sft_loss": 1.0234375, "step": 1454 }, { "dpo_loss": 0.1494140625, "epoch": 0.23, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 8.802153489855399e-07, "loss": 0.1686, "projector_lr": 2.64064604695662e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.53125, "sft_loss": 0.984375, "step": 1455 }, { "dpo_loss": 0.515625, "epoch": 0.23, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 8.800504211789908e-07, "loss": 0.4308, "projector_lr": 2.6401512635369724e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.328125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -5.25, "sft_loss": 0.75390625, "step": 1456 }, { "dpo_loss": 0.275390625, "epoch": 0.23, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 8.798853953826751e-07, "loss": 0.2038, "projector_lr": 2.6396561861480254e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 2.59375, "rewards_train/rejected": -4.375, "sft_loss": 0.6953125, "step": 1457 }, { "dpo_loss": 0.08056640625, "epoch": 0.23, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 8.797202716391423e-07, "loss": 0.2379, "projector_lr": 2.639160814917427e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 3.53125, "rewards_train/rejected": -5.03125, "sft_loss": 0.8515625, "step": 1458 }, { "dpo_loss": 0.234375, "epoch": 0.23, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 8.795550499909667e-07, "loss": 0.1792, "projector_lr": 2.6386651499729003e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.5, "sft_loss": 0.921875, "step": 1459 }, { "dpo_loss": 0.1142578125, "epoch": 0.23, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 8.793897304807483e-07, "loss": 0.1249, "projector_lr": 2.638169191442245e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 3.453125, "rewards_train/rejected": -3.765625, "sft_loss": 0.765625, "step": 1460 }, { "dpo_loss": 0.37890625, "epoch": 0.23, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 8.792243131511117e-07, "loss": 0.3295, "projector_lr": 2.6376729394533354e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.578125, "sft_loss": 0.94921875, "step": 1461 }, { "dpo_loss": 0.0233154296875, "epoch": 0.23, "final_loss": 0.0233154296875, "grad_norm": 0.0, "learning_rate": 8.79058798044707e-07, "loss": 0.1554, "projector_lr": 2.6371763941341214e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0712890625, "rewards_train/margins": 5.0, "rewards_train/rejected": -4.90625, "sft_loss": 0.640625, "step": 1462 }, { "dpo_loss": 0.3828125, "epoch": 0.23, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 8.788931852042098e-07, "loss": 0.3919, "projector_lr": 2.63667955561263e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3359375, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.453125, "sft_loss": 0.71875, "step": 1463 }, { "dpo_loss": 0.1279296875, "epoch": 0.23, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 8.787274746723205e-07, "loss": 0.1652, "projector_lr": 2.636182424016962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.890625, "sft_loss": 0.63671875, "step": 1464 }, { "dpo_loss": 0.1298828125, "epoch": 0.23, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 8.78561666491765e-07, "loss": 0.1994, "projector_lr": 2.635684999475295e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.42578125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -4.1875, "sft_loss": 0.70703125, "step": 1465 }, { "dpo_loss": 0.12451171875, "epoch": 0.23, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 8.783957607052941e-07, "loss": 0.1537, "projector_lr": 2.6351872821158826e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -3.859375, "sft_loss": 0.75390625, "step": 1466 }, { "dpo_loss": 0.0142822265625, "epoch": 0.23, "final_loss": 0.0142822265625, "grad_norm": 0.0, "learning_rate": 8.782297573556838e-07, "loss": 0.0333, "projector_lr": 2.6346892720670515e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.0625, "sft_loss": 0.7265625, "step": 1467 }, { "dpo_loss": 0.146484375, "epoch": 0.23, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 8.780636564857356e-07, "loss": 0.1304, "projector_lr": 2.6341909694572067e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.71875, "sft_loss": 0.74609375, "step": 1468 }, { "dpo_loss": 0.140625, "epoch": 0.24, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 8.778974581382756e-07, "loss": 0.3083, "projector_lr": 2.6336923744148267e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0654296875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.328125, "sft_loss": 0.56640625, "step": 1469 }, { "dpo_loss": 0.1005859375, "epoch": 0.24, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 8.777311623561554e-07, "loss": 0.0575, "projector_lr": 2.6331934870684663e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2275390625, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.03125, "sft_loss": 0.82421875, "step": 1470 }, { "dpo_loss": 0.8359375, "epoch": 0.24, "final_loss": 0.8359375, "grad_norm": 0.0, "learning_rate": 8.775647691822516e-07, "loss": 0.5419, "projector_lr": 2.632694307546755e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 1.734375, "rewards_train/rejected": -2.328125, "sft_loss": 0.6640625, "step": 1471 }, { "dpo_loss": 0.015380859375, "epoch": 0.24, "final_loss": 0.015380859375, "grad_norm": 0.0, "learning_rate": 8.773982786594662e-07, "loss": 0.1232, "projector_lr": 2.632194835978399e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.125, "sft_loss": 0.61328125, "step": 1472 }, { "dpo_loss": 0.2412109375, "epoch": 0.24, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 8.772316908307258e-07, "loss": 0.2251, "projector_lr": 2.6316950724921774e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.12890625, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.171875, "sft_loss": 0.53125, "step": 1473 }, { "dpo_loss": 0.055908203125, "epoch": 0.24, "final_loss": 0.055908203125, "grad_norm": 0.0, "learning_rate": 8.770650057389825e-07, "loss": 0.1131, "projector_lr": 2.6311950172169475e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.921875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -4.03125, "sft_loss": 0.5234375, "step": 1474 }, { "dpo_loss": 0.126953125, "epoch": 0.24, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 8.768982234272131e-07, "loss": 0.1097, "projector_lr": 2.6306946702816393e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.302734375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.34375, "sft_loss": 0.45703125, "step": 1475 }, { "dpo_loss": 0.01556396484375, "epoch": 0.24, "final_loss": 0.01556396484375, "grad_norm": 0.0, "learning_rate": 8.767313439384199e-07, "loss": 0.1002, "projector_lr": 2.6301940318152598e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.0, "sft_loss": 0.6796875, "step": 1476 }, { "dpo_loss": 0.26171875, "epoch": 0.24, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 8.7656436731563e-07, "loss": 0.2138, "projector_lr": 2.6296931019468904e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2001953125, "rewards_train/margins": 2.796875, "rewards_train/rejected": -2.59375, "sft_loss": 0.55078125, "step": 1477 }, { "dpo_loss": 0.357421875, "epoch": 0.24, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 8.763972936018955e-07, "loss": 0.5594, "projector_lr": 2.6291918808056867e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.984375, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.21875, "sft_loss": 0.83203125, "step": 1478 }, { "dpo_loss": 0.050048828125, "epoch": 0.24, "final_loss": 0.050048828125, "grad_norm": 0.0, "learning_rate": 8.762301228402936e-07, "loss": 0.2073, "projector_lr": 2.628690368520881e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.34375, "sft_loss": 0.79296875, "step": 1479 }, { "dpo_loss": 0.74609375, "epoch": 0.24, "final_loss": 0.74609375, "grad_norm": 0.0, "learning_rate": 8.760628550739266e-07, "loss": 0.4058, "projector_lr": 2.62818856522178e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 1.7265625, "rewards_train/rejected": -2.875, "sft_loss": 0.76953125, "step": 1480 }, { "dpo_loss": 0.25, "epoch": 0.24, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 8.758954903459218e-07, "loss": 0.1923, "projector_lr": 2.627686471037766e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.23046875, "rewards_train/margins": 2.328125, "rewards_train/rejected": -2.5625, "sft_loss": 0.7265625, "step": 1481 }, { "dpo_loss": 0.050537109375, "epoch": 0.24, "final_loss": 0.050537109375, "grad_norm": 0.0, "learning_rate": 8.757280286994313e-07, "loss": 0.2117, "projector_lr": 2.627184086098294e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.8125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -3.125, "sft_loss": 0.5546875, "step": 1482 }, { "dpo_loss": 0.059326171875, "epoch": 0.24, "final_loss": 0.059326171875, "grad_norm": 0.0, "learning_rate": 8.755604701776324e-07, "loss": 0.2252, "projector_lr": 2.6266814105328975e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.828125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -3.21875, "sft_loss": 0.62109375, "step": 1483 }, { "dpo_loss": 0.3515625, "epoch": 0.24, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 8.753928148237275e-07, "loss": 0.3516, "projector_lr": 2.6261784444711825e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.671875, "rewards_train/margins": 1.140625, "rewards_train/rejected": -1.8125, "sft_loss": 0.70703125, "step": 1484 }, { "dpo_loss": 0.17578125, "epoch": 0.24, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 8.752250626809436e-07, "loss": 0.1893, "projector_lr": 2.625675188042831e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2373046875, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.234375, "sft_loss": 0.78515625, "step": 1485 }, { "dpo_loss": 0.419921875, "epoch": 0.24, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 8.750572137925329e-07, "loss": 0.4689, "projector_lr": 2.625171641377599e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.109375, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.28125, "sft_loss": 0.76171875, "step": 1486 }, { "dpo_loss": 0.1142578125, "epoch": 0.24, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 8.748892682017724e-07, "loss": 0.1058, "projector_lr": 2.624667804605317e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47265625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.625, "sft_loss": 0.734375, "step": 1487 }, { "dpo_loss": 0.36328125, "epoch": 0.24, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 8.747212259519641e-07, "loss": 0.3707, "projector_lr": 2.6241636778558926e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -3.234375, "sft_loss": 0.8046875, "step": 1488 }, { "dpo_loss": 0.1572265625, "epoch": 0.24, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 8.745530870864351e-07, "loss": 0.1663, "projector_lr": 2.6236592612593055e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.20703125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.15625, "sft_loss": 0.62109375, "step": 1489 }, { "dpo_loss": 0.5, "epoch": 0.24, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 8.743848516485371e-07, "loss": 0.4626, "projector_lr": 2.6231545549456113e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 1.6953125, "rewards_train/rejected": -2.34375, "sft_loss": 0.75390625, "step": 1490 }, { "dpo_loss": 0.302734375, "epoch": 0.24, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 8.742165196816468e-07, "loss": 0.1718, "projector_lr": 2.6226495590449406e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.29296875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -3.734375, "sft_loss": 0.89453125, "step": 1491 }, { "dpo_loss": 0.2275390625, "epoch": 0.24, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 8.740480912291659e-07, "loss": 0.322, "projector_lr": 2.622144273687498e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.3125, "sft_loss": 0.76953125, "step": 1492 }, { "dpo_loss": 0.298828125, "epoch": 0.24, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 8.73879566334521e-07, "loss": 0.2082, "projector_lr": 2.6216386990035634e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.23828125, "rewards_train/margins": 3.65625, "rewards_train/rejected": -3.890625, "sft_loss": 0.515625, "step": 1493 }, { "dpo_loss": 0.29296875, "epoch": 0.24, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 8.737109450411637e-07, "loss": 0.3152, "projector_lr": 2.6211328351234915e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.0, "sft_loss": 0.81640625, "step": 1494 }, { "dpo_loss": 0.427734375, "epoch": 0.24, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 8.735422273925697e-07, "loss": 0.263, "projector_lr": 2.620626682177709e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -3.96875, "sft_loss": 0.8515625, "step": 1495 }, { "dpo_loss": 0.453125, "epoch": 0.24, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 8.733734134322404e-07, "loss": 0.29, "projector_lr": 2.6201202402967214e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 1.59375, "rewards_train/rejected": -2.375, "sft_loss": 0.83203125, "step": 1496 }, { "dpo_loss": 0.09130859375, "epoch": 0.24, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 8.732045032037017e-07, "loss": 0.1737, "projector_lr": 2.619613509611105e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.203125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.34375, "sft_loss": 0.66796875, "step": 1497 }, { "dpo_loss": 0.0986328125, "epoch": 0.24, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 8.730354967505042e-07, "loss": 0.1266, "projector_lr": 2.6191064902515125e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32421875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -4.59375, "sft_loss": 0.78515625, "step": 1498 }, { "dpo_loss": 0.375, "epoch": 0.24, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 8.728663941162235e-07, "loss": 0.2735, "projector_lr": 2.618599182348671e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.34375, "sft_loss": 0.6796875, "step": 1499 }, { "dpo_loss": 0.44140625, "epoch": 0.24, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 8.726971953444599e-07, "loss": 0.3949, "projector_lr": 2.61809158603338e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.5625, "sft_loss": 0.625, "step": 1500 }, { "dpo_loss": 0.0301513671875, "epoch": 0.24, "final_loss": 0.0301513671875, "grad_norm": 0.0, "learning_rate": 8.725279004788388e-07, "loss": 0.0902, "projector_lr": 2.6175837014365165e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.0625, "sft_loss": 0.69140625, "step": 1501 }, { "dpo_loss": 0.058837890625, "epoch": 0.24, "final_loss": 0.058837890625, "grad_norm": 0.0, "learning_rate": 8.723585095630097e-07, "loss": 0.0437, "projector_lr": 2.617075528689029e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.0625, "sft_loss": 0.78125, "step": 1502 }, { "dpo_loss": 0.373046875, "epoch": 0.24, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 8.721890226406476e-07, "loss": 0.4341, "projector_lr": 2.616567067921943e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.205078125, "rewards_train/margins": 3.0, "rewards_train/rejected": -2.796875, "sft_loss": 0.79296875, "step": 1503 }, { "dpo_loss": 0.0245361328125, "epoch": 0.24, "final_loss": 0.0245361328125, "grad_norm": 0.0, "learning_rate": 8.720194397554517e-07, "loss": 0.1543, "projector_lr": 2.6160583192663554e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.0625, "sft_loss": 0.7734375, "step": 1504 }, { "dpo_loss": 0.423828125, "epoch": 0.24, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 8.718497609511464e-07, "loss": 0.2628, "projector_lr": 2.6155492828534395e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.421875, "rewards_train/margins": 2.625, "rewards_train/rejected": -4.03125, "sft_loss": 0.5703125, "step": 1505 }, { "dpo_loss": 0.34765625, "epoch": 0.24, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 8.716799862714803e-07, "loss": 0.2686, "projector_lr": 2.615039958814441e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4375, "rewards_train/margins": 2.328125, "rewards_train/rejected": -3.765625, "sft_loss": 0.85546875, "step": 1506 }, { "dpo_loss": 0.2412109375, "epoch": 0.24, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 8.715101157602271e-07, "loss": 0.1877, "projector_lr": 2.6145303472806815e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.8125, "sft_loss": 0.87109375, "step": 1507 }, { "dpo_loss": 0.51953125, "epoch": 0.24, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 8.713401494611854e-07, "loss": 0.4179, "projector_lr": 2.6140204483835564e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5625, "rewards_train/margins": 2.125, "rewards_train/rejected": -3.6875, "sft_loss": 0.796875, "step": 1508 }, { "dpo_loss": 0.244140625, "epoch": 0.24, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 8.711700874181778e-07, "loss": 0.2222, "projector_lr": 2.6135102622545336e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.4375, "sft_loss": 0.7421875, "step": 1509 }, { "dpo_loss": 0.1982421875, "epoch": 0.24, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 8.709999296750523e-07, "loss": 0.2267, "projector_lr": 2.612999789025157e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -3.671875, "sft_loss": 0.6484375, "step": 1510 }, { "dpo_loss": 0.1923828125, "epoch": 0.24, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 8.708296762756811e-07, "loss": 0.2172, "projector_lr": 2.612489028827044e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -5.28125, "sft_loss": 0.8671875, "step": 1511 }, { "dpo_loss": 0.431640625, "epoch": 0.24, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 8.706593272639615e-07, "loss": 0.3833, "projector_lr": 2.6119779817918845e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.171875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.625, "sft_loss": 0.890625, "step": 1512 }, { "dpo_loss": 0.1337890625, "epoch": 0.24, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 8.70488882683815e-07, "loss": 0.3681, "projector_lr": 2.611466648051445e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.65625, "sft_loss": 0.7734375, "step": 1513 }, { "dpo_loss": 0.068359375, "epoch": 0.24, "final_loss": 0.068359375, "grad_norm": 0.0, "learning_rate": 8.703183425791879e-07, "loss": 0.1153, "projector_lr": 2.6109550277375638e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.0, "sft_loss": 0.74609375, "step": 1514 }, { "dpo_loss": 0.2392578125, "epoch": 0.24, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 8.701477069940513e-07, "loss": 0.2581, "projector_lr": 2.610443120982154e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -3.515625, "sft_loss": 0.578125, "step": 1515 }, { "dpo_loss": 0.6328125, "epoch": 0.24, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 8.699769759724008e-07, "loss": 0.4134, "projector_lr": 2.6099309279172023e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 1.453125, "rewards_train/rejected": -2.140625, "sft_loss": 0.69140625, "step": 1516 }, { "dpo_loss": 0.263671875, "epoch": 0.24, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 8.698061495582565e-07, "loss": 0.3009, "projector_lr": 2.6094184486747697e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 2.296875, "rewards_train/rejected": -3.375, "sft_loss": 0.8671875, "step": 1517 }, { "dpo_loss": 0.357421875, "epoch": 0.24, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 8.696352277956631e-07, "loss": 0.2326, "projector_lr": 2.6089056833869895e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 2.65625, "rewards_train/rejected": -3.234375, "sft_loss": 0.9375, "step": 1518 }, { "dpo_loss": 0.384765625, "epoch": 0.24, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 8.694642107286902e-07, "loss": 0.2877, "projector_lr": 2.608392632186071e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 2.359375, "rewards_train/rejected": -2.890625, "sft_loss": 0.99609375, "step": 1519 }, { "dpo_loss": 0.451171875, "epoch": 0.24, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 8.692930984014316e-07, "loss": 0.5326, "projector_lr": 2.607879295204295e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -2.6875, "sft_loss": 0.98046875, "step": 1520 }, { "dpo_loss": 0.052978515625, "epoch": 0.24, "final_loss": 0.052978515625, "grad_norm": 0.0, "learning_rate": 8.691218908580058e-07, "loss": 0.1053, "projector_lr": 2.6073656725740175e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.75, "sft_loss": 0.9609375, "step": 1521 }, { "dpo_loss": 0.35546875, "epoch": 0.24, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 8.68950588142556e-07, "loss": 0.2147, "projector_lr": 2.6068517644276685e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53125, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.796875, "sft_loss": 0.71875, "step": 1522 }, { "dpo_loss": 0.341796875, "epoch": 0.24, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 8.687791902992496e-07, "loss": 0.2595, "projector_lr": 2.606337570897749e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.380859375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.125, "sft_loss": 0.7890625, "step": 1523 }, { "dpo_loss": 0.21875, "epoch": 0.24, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 8.686076973722789e-07, "loss": 0.2696, "projector_lr": 2.6058230921168368e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -2.734375, "sft_loss": 0.7578125, "step": 1524 }, { "dpo_loss": 0.142578125, "epoch": 0.24, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 8.684361094058604e-07, "loss": 0.1577, "projector_lr": 2.6053083282175816e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 2.375, "rewards_train/rejected": -4.125, "sft_loss": 0.84765625, "step": 1525 }, { "dpo_loss": 0.38671875, "epoch": 0.24, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 8.682644264442354e-07, "loss": 0.2205, "projector_lr": 2.6047932793327064e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -3.6875, "sft_loss": 0.7109375, "step": 1526 }, { "dpo_loss": 0.3125, "epoch": 0.24, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 8.680926485316693e-07, "loss": 0.2654, "projector_lr": 2.604277945595008e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -3.28125, "sft_loss": 0.8125, "step": 1527 }, { "dpo_loss": 0.0458984375, "epoch": 0.24, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 8.679207757124524e-07, "loss": 0.0551, "projector_lr": 2.603762327137357e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.341796875, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.40625, "sft_loss": 0.6953125, "step": 1528 }, { "dpo_loss": 0.21875, "epoch": 0.24, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 8.677488080308992e-07, "loss": 0.3524, "projector_lr": 2.6032464240926978e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.40625, "sft_loss": 0.609375, "step": 1529 }, { "dpo_loss": 0.345703125, "epoch": 0.24, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 8.675767455313487e-07, "loss": 0.2347, "projector_lr": 2.6027302365940463e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.96875, "sft_loss": 0.79296875, "step": 1530 }, { "dpo_loss": 0.4609375, "epoch": 0.24, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 8.674045882581646e-07, "loss": 0.3521, "projector_lr": 2.602213764774494e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 1.484375, "rewards_train/rejected": -2.015625, "sft_loss": 0.91015625, "step": 1531 }, { "dpo_loss": 0.162109375, "epoch": 0.25, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 8.672323362557347e-07, "loss": 0.3192, "projector_lr": 2.6016970087672045e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.009521484375, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.046875, "sft_loss": 0.83984375, "step": 1532 }, { "dpo_loss": 0.408203125, "epoch": 0.25, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 8.670599895684714e-07, "loss": 0.2829, "projector_lr": 2.6011799687054145e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.421875, "sft_loss": 0.60546875, "step": 1533 }, { "dpo_loss": 0.5703125, "epoch": 0.25, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 8.668875482408113e-07, "loss": 0.3397, "projector_lr": 2.6006626447224344e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 2.109375, "rewards_train/rejected": -3.140625, "sft_loss": 0.8125, "step": 1534 }, { "dpo_loss": 0.4765625, "epoch": 0.25, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 8.667150123172158e-07, "loss": 0.3467, "projector_lr": 2.6001450369516476e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 1.4375, "rewards_train/rejected": -2.75, "sft_loss": 0.8125, "step": 1535 }, { "dpo_loss": 0.2275390625, "epoch": 0.25, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 8.665423818421702e-07, "loss": 0.2755, "projector_lr": 2.5996271455265108e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 3.015625, "rewards_train/rejected": -3.953125, "sft_loss": 0.9296875, "step": 1536 }, { "dpo_loss": 0.84375, "epoch": 0.25, "final_loss": 0.84375, "grad_norm": 0.0, "learning_rate": 8.663696568601846e-07, "loss": 0.4974, "projector_lr": 2.599108970580554e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.640625, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -4.5625, "sft_loss": 0.828125, "step": 1537 }, { "dpo_loss": 0.58984375, "epoch": 0.25, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 8.661968374157933e-07, "loss": 0.3963, "projector_lr": 2.59859051224738e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.375, "rewards_train/margins": 1.890625, "rewards_train/rejected": -3.265625, "sft_loss": 0.81640625, "step": 1538 }, { "dpo_loss": 0.115234375, "epoch": 0.25, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 8.660239235535551e-07, "loss": 0.3234, "projector_lr": 2.5980717706606654e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.84375, "sft_loss": 0.953125, "step": 1539 }, { "dpo_loss": 0.208984375, "epoch": 0.25, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 8.658509153180527e-07, "loss": 0.2521, "projector_lr": 2.597552745954158e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.0, "sft_loss": 0.76953125, "step": 1540 }, { "dpo_loss": 0.2890625, "epoch": 0.25, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 8.656778127538936e-07, "loss": 0.3375, "projector_lr": 2.597033438261681e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.296875, "rewards_train/margins": 3.171875, "rewards_train/rejected": -5.46875, "sft_loss": 0.79296875, "step": 1541 }, { "dpo_loss": 0.083984375, "epoch": 0.25, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 8.655046159057095e-07, "loss": 0.1452, "projector_lr": 2.5965138477171286e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.640625, "rewards_train/margins": 3.59375, "rewards_train/rejected": -6.25, "sft_loss": 0.890625, "step": 1542 }, { "dpo_loss": 0.3671875, "epoch": 0.25, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 8.653313248181561e-07, "loss": 0.2691, "projector_lr": 2.595993974454469e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.5625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -5.96875, "sft_loss": 0.74609375, "step": 1543 }, { "dpo_loss": 0.4765625, "epoch": 0.25, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 8.651579395359141e-07, "loss": 0.267, "projector_lr": 2.5954738186077424e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.15625, "rewards_train/margins": 2.046875, "rewards_train/rejected": -4.1875, "sft_loss": 0.91796875, "step": 1544 }, { "dpo_loss": 0.26953125, "epoch": 0.25, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 8.649844601036878e-07, "loss": 0.2055, "projector_lr": 2.5949533803110635e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.484375, "rewards_train/margins": 3.5, "rewards_train/rejected": -6.0, "sft_loss": 0.86328125, "step": 1545 }, { "dpo_loss": 0.232421875, "epoch": 0.25, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 8.648108865662059e-07, "loss": 0.1718, "projector_lr": 2.594432659698618e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 2.0625, "rewards_train/rejected": -3.828125, "sft_loss": 1.0625, "step": 1546 }, { "dpo_loss": 0.1357421875, "epoch": 0.25, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 8.646372189682217e-07, "loss": 0.1607, "projector_lr": 2.5939116569046653e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.0, "sft_loss": 0.6875, "step": 1547 }, { "dpo_loss": 0.12451171875, "epoch": 0.25, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 8.644634573545126e-07, "loss": 0.335, "projector_lr": 2.593390372063538e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.28125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -5.5, "sft_loss": 0.9375, "step": 1548 }, { "dpo_loss": 0.859375, "epoch": 0.25, "final_loss": 0.859375, "grad_norm": 0.0, "learning_rate": 8.642896017698799e-07, "loss": 0.5299, "projector_lr": 2.5928688053096397e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -2.03125, "rewards_train/margins": 2.0, "rewards_train/rejected": -4.03125, "sft_loss": 0.921875, "step": 1549 }, { "dpo_loss": 0.21875, "epoch": 0.25, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 8.641156522591496e-07, "loss": 0.2977, "projector_lr": 2.5923469567774486e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.25, "rewards_train/margins": 2.3125, "rewards_train/rejected": -4.5625, "sft_loss": 0.91015625, "step": 1550 }, { "dpo_loss": 0.0179443359375, "epoch": 0.25, "final_loss": 0.0179443359375, "grad_norm": 0.0, "learning_rate": 8.639416088671716e-07, "loss": 0.1117, "projector_lr": 2.591824826601515e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.875, "sft_loss": 0.72265625, "step": 1551 }, { "dpo_loss": 0.39453125, "epoch": 0.25, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 8.637674716388203e-07, "loss": 0.2939, "projector_lr": 2.5913024149164614e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 2.15625, "rewards_train/rejected": -3.859375, "sft_loss": 1.125, "step": 1552 }, { "dpo_loss": 0.298828125, "epoch": 0.25, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 8.635932406189941e-07, "loss": 0.2095, "projector_lr": 2.5907797218569825e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 2.609375, "rewards_train/rejected": -3.625, "sft_loss": 0.828125, "step": 1553 }, { "dpo_loss": 0.34375, "epoch": 0.25, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 8.634189158526155e-07, "loss": 0.2323, "projector_lr": 2.5902567475578465e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.40625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -5.0625, "sft_loss": 0.5859375, "step": 1554 }, { "dpo_loss": 0.1826171875, "epoch": 0.25, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 8.632444973846314e-07, "loss": 0.1805, "projector_lr": 2.5897334921538943e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.25, "sft_loss": 0.62109375, "step": 1555 }, { "dpo_loss": 0.04443359375, "epoch": 0.25, "final_loss": 0.04443359375, "grad_norm": 0.0, "learning_rate": 8.630699852600128e-07, "loss": 0.1212, "projector_lr": 2.5892099557800385e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.734375, "rewards_train/margins": 3.953125, "rewards_train/rejected": -6.6875, "sft_loss": 0.75390625, "step": 1556 }, { "dpo_loss": 0.1494140625, "epoch": 0.25, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 8.628953795237545e-07, "loss": 0.2855, "projector_lr": 2.5886861385712634e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.0, "sft_loss": 0.94921875, "step": 1557 }, { "dpo_loss": 0.1953125, "epoch": 0.25, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 8.62720680220876e-07, "loss": 0.2893, "projector_lr": 2.588162040662628e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.03125, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.6875, "sft_loss": 0.7734375, "step": 1558 }, { "dpo_loss": 0.5234375, "epoch": 0.25, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 8.625458873964204e-07, "loss": 0.3014, "projector_lr": 2.5876376621892615e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 2.1875, "rewards_train/rejected": -3.921875, "sft_loss": 0.9375, "step": 1559 }, { "dpo_loss": 0.470703125, "epoch": 0.25, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 8.623710010954555e-07, "loss": 0.2626, "projector_lr": 2.5871130032863666e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.9140625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -4.65625, "sft_loss": 0.953125, "step": 1560 }, { "dpo_loss": 0.328125, "epoch": 0.25, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 8.621960213630726e-07, "loss": 0.264, "projector_lr": 2.586588064089218e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.734375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.40625, "sft_loss": 0.84375, "step": 1561 }, { "dpo_loss": 0.1669921875, "epoch": 0.25, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 8.620209482443874e-07, "loss": 0.4035, "projector_lr": 2.586062844733162e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.71875, "sft_loss": 0.71875, "step": 1562 }, { "dpo_loss": 0.416015625, "epoch": 0.25, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 8.618457817845395e-07, "loss": 0.2511, "projector_lr": 2.585537345353619e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.8125, "sft_loss": 0.8671875, "step": 1563 }, { "dpo_loss": 0.10595703125, "epoch": 0.25, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 8.616705220286931e-07, "loss": 0.0811, "projector_lr": 2.5850115660860794e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.96875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -6.0625, "sft_loss": 0.640625, "step": 1564 }, { "dpo_loss": 0.1103515625, "epoch": 0.25, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 8.614951690220358e-07, "loss": 0.0742, "projector_lr": 2.5844855070661076e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.0, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.90625, "sft_loss": 0.796875, "step": 1565 }, { "dpo_loss": 0.2421875, "epoch": 0.25, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 8.613197228097794e-07, "loss": 0.2867, "projector_lr": 2.5839591684293384e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.65625, "sft_loss": 0.6796875, "step": 1566 }, { "dpo_loss": 0.251953125, "epoch": 0.25, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 8.6114418343716e-07, "loss": 0.2289, "projector_lr": 2.58343255031148e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515625, "rewards_train/margins": 1.8359375, "rewards_train/rejected": -3.359375, "sft_loss": 0.80859375, "step": 1567 }, { "dpo_loss": 0.328125, "epoch": 0.25, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 8.609685509494375e-07, "loss": 0.377, "projector_lr": 2.5829056528483124e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.3125, "rewards_train/margins": 3.4375, "rewards_train/rejected": -5.75, "sft_loss": 0.6015625, "step": 1568 }, { "dpo_loss": 0.26953125, "epoch": 0.25, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 8.607928253918957e-07, "loss": 0.2005, "projector_lr": 2.5823784761756872e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.5625, "sft_loss": 0.8203125, "step": 1569 }, { "dpo_loss": 0.291015625, "epoch": 0.25, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 8.606170068098428e-07, "loss": 0.3005, "projector_lr": 2.5818510204295287e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.4375, "sft_loss": 0.48046875, "step": 1570 }, { "dpo_loss": 0.1767578125, "epoch": 0.25, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 8.604410952486106e-07, "loss": 0.1162, "projector_lr": 2.581323285745832e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.75, "rewards_train/margins": 3.3125, "rewards_train/rejected": -5.0625, "sft_loss": 0.91015625, "step": 1571 }, { "dpo_loss": 0.255859375, "epoch": 0.25, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 8.602650907535551e-07, "loss": 0.1909, "projector_lr": 2.580795272260665e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.203125, "sft_loss": 0.7734375, "step": 1572 }, { "dpo_loss": 0.396484375, "epoch": 0.25, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 8.600889933700559e-07, "loss": 0.3338, "projector_lr": 2.580266980110168e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.875, "sft_loss": 0.62890625, "step": 1573 }, { "dpo_loss": 0.314453125, "epoch": 0.25, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 8.599128031435172e-07, "loss": 0.2169, "projector_lr": 2.579738409430552e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 2.390625, "rewards_train/rejected": -4.0625, "sft_loss": 0.78515625, "step": 1574 }, { "dpo_loss": 0.0238037109375, "epoch": 0.25, "final_loss": 0.0238037109375, "grad_norm": 0.0, "learning_rate": 8.597365201193667e-07, "loss": 0.2537, "projector_lr": 2.5792095603581003e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.9375, "sft_loss": 0.765625, "step": 1575 }, { "dpo_loss": 0.26171875, "epoch": 0.25, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 8.595601443430558e-07, "loss": 0.1985, "projector_lr": 2.5786804330291676e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 2.234375, "rewards_train/rejected": -3.296875, "sft_loss": 0.86328125, "step": 1576 }, { "dpo_loss": 0.12060546875, "epoch": 0.25, "final_loss": 0.12060546875, "grad_norm": 0.0, "learning_rate": 8.593836758600604e-07, "loss": 0.5048, "projector_lr": 2.578151027580181e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53125, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.09375, "sft_loss": 0.703125, "step": 1577 }, { "dpo_loss": 0.34375, "epoch": 0.25, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 8.592071147158797e-07, "loss": 0.2476, "projector_lr": 2.577621344147639e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.5, "sft_loss": 0.69140625, "step": 1578 }, { "dpo_loss": 0.1240234375, "epoch": 0.25, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 8.590304609560375e-07, "loss": 0.5419, "projector_lr": 2.5770913828681128e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.062255859375, "rewards_train/margins": 3.765625, "rewards_train/rejected": -3.828125, "sft_loss": 0.50390625, "step": 1579 }, { "dpo_loss": 0.1240234375, "epoch": 0.25, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 8.588537146260807e-07, "loss": 0.2496, "projector_lr": 2.576561143878242e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.8125, "sft_loss": 0.73828125, "step": 1580 }, { "dpo_loss": 0.2158203125, "epoch": 0.25, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 8.586768757715805e-07, "loss": 0.1447, "projector_lr": 2.576030627314742e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.890625, "sft_loss": 1.0, "step": 1581 }, { "dpo_loss": 0.05029296875, "epoch": 0.25, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 8.584999444381323e-07, "loss": 0.1253, "projector_lr": 2.575499833314397e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.90625, "sft_loss": 0.84375, "step": 1582 }, { "dpo_loss": 0.5703125, "epoch": 0.25, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 8.583229206713542e-07, "loss": 0.5149, "projector_lr": 2.5749687620140627e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.1875, "sft_loss": 0.73828125, "step": 1583 }, { "dpo_loss": 0.0693359375, "epoch": 0.25, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 8.581458045168897e-07, "loss": 0.1147, "projector_lr": 2.574437413550669e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1435546875, "rewards_train/margins": 3.84375, "rewards_train/rejected": -3.6875, "sft_loss": 0.78515625, "step": 1584 }, { "dpo_loss": 0.036376953125, "epoch": 0.25, "final_loss": 0.036376953125, "grad_norm": 0.0, "learning_rate": 8.579685960204046e-07, "loss": 0.2552, "projector_lr": 2.573905788061214e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4375, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.6875, "sft_loss": 0.70703125, "step": 1585 }, { "dpo_loss": 0.04052734375, "epoch": 0.25, "final_loss": 0.04052734375, "grad_norm": 0.0, "learning_rate": 8.577912952275898e-07, "loss": 0.1678, "projector_lr": 2.5733738856827694e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.5625, "sft_loss": 0.84765625, "step": 1586 }, { "dpo_loss": 0.267578125, "epoch": 0.25, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 8.576139021841588e-07, "loss": 0.2884, "projector_lr": 2.572841706552477e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.453125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -3.046875, "sft_loss": 0.70703125, "step": 1587 }, { "dpo_loss": 0.2294921875, "epoch": 0.25, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 8.5743641693585e-07, "loss": 0.1562, "projector_lr": 2.57230925080755e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.0625, "sft_loss": 0.64453125, "step": 1588 }, { "dpo_loss": 0.3203125, "epoch": 0.25, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 8.572588395284248e-07, "loss": 0.2357, "projector_lr": 2.5717765185852746e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.21875, "sft_loss": 0.76171875, "step": 1589 }, { "dpo_loss": 0.71484375, "epoch": 0.25, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 8.570811700076685e-07, "loss": 0.5541, "projector_lr": 2.571243510023006e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 2.625, "rewards_train/rejected": -3.546875, "sft_loss": 0.671875, "step": 1590 }, { "dpo_loss": 0.177734375, "epoch": 0.25, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 8.569034084193907e-07, "loss": 0.1572, "projector_lr": 2.570710225258172e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.734375, "sft_loss": 0.71875, "step": 1591 }, { "dpo_loss": 0.49609375, "epoch": 0.25, "final_loss": 0.49609375, "grad_norm": 0.0, "learning_rate": 8.567255548094239e-07, "loss": 0.3467, "projector_lr": 2.5701766644282717e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 2.625, "rewards_train/rejected": -3.484375, "sft_loss": 0.80078125, "step": 1592 }, { "dpo_loss": 0.28515625, "epoch": 0.25, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 8.565476092236249e-07, "loss": 0.468, "projector_lr": 2.569642827670875e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.375, "rewards_train/margins": 2.484375, "rewards_train/rejected": -2.859375, "sft_loss": 0.734375, "step": 1593 }, { "dpo_loss": 0.5859375, "epoch": 0.26, "final_loss": 0.5859375, "grad_norm": 0.0, "learning_rate": 8.56369571707874e-07, "loss": 0.6266, "projector_lr": 2.5691087151236224e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -2.765625, "sft_loss": 0.921875, "step": 1594 }, { "dpo_loss": 0.455078125, "epoch": 0.26, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 8.561914423080753e-07, "loss": 0.2717, "projector_lr": 2.568574326924226e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.125, "sft_loss": 0.69140625, "step": 1595 }, { "dpo_loss": 0.251953125, "epoch": 0.26, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 8.560132210701567e-07, "loss": 0.3836, "projector_lr": 2.5680396632104705e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2470703125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.3125, "sft_loss": 1.0234375, "step": 1596 }, { "dpo_loss": 0.26171875, "epoch": 0.26, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 8.558349080400693e-07, "loss": 0.3073, "projector_lr": 2.567504724120208e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 2.0, "rewards_train/rejected": -2.9375, "sft_loss": 0.609375, "step": 1597 }, { "dpo_loss": 0.259765625, "epoch": 0.26, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 8.556565032637885e-07, "loss": 0.2484, "projector_lr": 2.5669695097913655e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.0, "sft_loss": 0.68359375, "step": 1598 }, { "dpo_loss": 0.3828125, "epoch": 0.26, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 8.554780067873126e-07, "loss": 0.273, "projector_lr": 2.566434020361938e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -2.875, "sft_loss": 0.7265625, "step": 1599 }, { "dpo_loss": 0.142578125, "epoch": 0.26, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 8.552994186566645e-07, "loss": 0.1772, "projector_lr": 2.5658982559699937e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.232421875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.25, "sft_loss": 0.76171875, "step": 1600 }, { "dpo_loss": 0.189453125, "epoch": 0.26, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 8.5512073891789e-07, "loss": 0.2457, "projector_lr": 2.56536221675367e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.375, "sft_loss": 0.69140625, "step": 1601 }, { "dpo_loss": 0.2431640625, "epoch": 0.26, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 8.549419676170586e-07, "loss": 0.2486, "projector_lr": 2.564825902851176e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 2.40625, "rewards_train/rejected": -3.4375, "sft_loss": 0.67578125, "step": 1602 }, { "dpo_loss": 0.30078125, "epoch": 0.26, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 8.547631048002639e-07, "loss": 0.2282, "projector_lr": 2.5642893144007916e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.28125, "sft_loss": 0.66015625, "step": 1603 }, { "dpo_loss": 0.31640625, "epoch": 0.26, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 8.545841505136223e-07, "loss": 0.3647, "projector_lr": 2.5637524515408673e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.9375, "sft_loss": 0.734375, "step": 1604 }, { "dpo_loss": 0.0517578125, "epoch": 0.26, "final_loss": 0.0517578125, "grad_norm": 0.0, "learning_rate": 8.544051048032745e-07, "loss": 0.0833, "projector_lr": 2.563215314409824e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.140625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -3.921875, "sft_loss": 0.5625, "step": 1605 }, { "dpo_loss": 0.197265625, "epoch": 0.26, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 8.542259677153846e-07, "loss": 0.3691, "projector_lr": 2.562677903146154e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.40625, "sft_loss": 0.83203125, "step": 1606 }, { "dpo_loss": 0.2109375, "epoch": 0.26, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 8.540467392961399e-07, "loss": 0.1574, "projector_lr": 2.56214021788842e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 2.421875, "rewards_train/rejected": -2.953125, "sft_loss": 0.55078125, "step": 1607 }, { "dpo_loss": 0.7890625, "epoch": 0.26, "final_loss": 0.7890625, "grad_norm": 0.0, "learning_rate": 8.538674195917516e-07, "loss": 0.4796, "projector_lr": 2.561602258775255e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 2.203125, "rewards_train/rejected": -3.671875, "sft_loss": 0.86328125, "step": 1608 }, { "dpo_loss": 0.1142578125, "epoch": 0.26, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 8.536880086484543e-07, "loss": 0.0677, "projector_lr": 2.561064025945363e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.6875, "sft_loss": 0.67578125, "step": 1609 }, { "dpo_loss": 0.361328125, "epoch": 0.26, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 8.535085065125064e-07, "loss": 0.3006, "projector_lr": 2.5605255195375193e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.90625, "sft_loss": 0.6875, "step": 1610 }, { "dpo_loss": 0.1337890625, "epoch": 0.26, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 8.533289132301893e-07, "loss": 0.1946, "projector_lr": 2.559986739690568e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.875, "sft_loss": 0.8046875, "step": 1611 }, { "dpo_loss": 0.66796875, "epoch": 0.26, "final_loss": 0.66796875, "grad_norm": 0.0, "learning_rate": 8.531492288478084e-07, "loss": 0.4838, "projector_lr": 2.5594476865434254e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 1.3984375, "rewards_train/rejected": -1.90625, "sft_loss": 0.77734375, "step": 1612 }, { "dpo_loss": 0.30859375, "epoch": 0.26, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 8.52969453411692e-07, "loss": 0.3551, "projector_lr": 2.5589083602350762e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.419921875, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.71875, "sft_loss": 0.65234375, "step": 1613 }, { "dpo_loss": 0.45703125, "epoch": 0.26, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 8.527895869681928e-07, "loss": 0.2696, "projector_lr": 2.5583687609045788e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.40625, "sft_loss": 0.8203125, "step": 1614 }, { "dpo_loss": 0.14453125, "epoch": 0.26, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 8.52609629563686e-07, "loss": 0.1689, "projector_lr": 2.5578288886910583e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3046875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -3.765625, "sft_loss": 0.7734375, "step": 1615 }, { "dpo_loss": 0.51953125, "epoch": 0.26, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 8.524295812445708e-07, "loss": 0.3645, "projector_lr": 2.5572887437337126e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0, "rewards_train/margins": 1.6171875, "rewards_train/rejected": -2.625, "sft_loss": 0.62109375, "step": 1616 }, { "dpo_loss": 0.150390625, "epoch": 0.26, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 8.522494420572697e-07, "loss": 0.1106, "projector_lr": 2.5567483261718094e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.625, "rewards_train/rejected": -4.625, "sft_loss": 0.8046875, "step": 1617 }, { "dpo_loss": 0.28515625, "epoch": 0.26, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 8.520692120482287e-07, "loss": 0.2013, "projector_lr": 2.5562076361446863e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.90625, "sft_loss": 0.63671875, "step": 1618 }, { "dpo_loss": 0.1630859375, "epoch": 0.26, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 8.51888891263917e-07, "loss": 0.1603, "projector_lr": 2.5556666737917514e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.21875, "sft_loss": 0.8125, "step": 1619 }, { "dpo_loss": 0.388671875, "epoch": 0.26, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 8.517084797508275e-07, "loss": 0.2355, "projector_lr": 2.5551254392524827e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.265625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.90625, "sft_loss": 0.86328125, "step": 1620 }, { "dpo_loss": 0.306640625, "epoch": 0.26, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 8.515279775554762e-07, "loss": 0.2426, "projector_lr": 2.554583932666429e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 3.171875, "rewards_train/rejected": -3.75, "sft_loss": 0.61328125, "step": 1621 }, { "dpo_loss": 0.33984375, "epoch": 0.26, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 8.513473847244028e-07, "loss": 0.2555, "projector_lr": 2.5540421541732088e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 1.8046875, "rewards_train/rejected": -2.53125, "sft_loss": 0.7734375, "step": 1622 }, { "dpo_loss": 0.125, "epoch": 0.26, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 8.511667013041701e-07, "loss": 0.1356, "projector_lr": 2.5535001039125105e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.515625, "sft_loss": 0.671875, "step": 1623 }, { "dpo_loss": 0.318359375, "epoch": 0.26, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 8.509859273413642e-07, "loss": 0.2757, "projector_lr": 2.552957782024093e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.40625, "sft_loss": 0.9296875, "step": 1624 }, { "dpo_loss": 0.3046875, "epoch": 0.26, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 8.508050628825951e-07, "loss": 0.56, "projector_lr": 2.5524151886477858e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.65625, "sft_loss": 0.6875, "step": 1625 }, { "dpo_loss": 0.220703125, "epoch": 0.26, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 8.506241079744954e-07, "loss": 0.2452, "projector_lr": 2.5518723239234863e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.640625, "sft_loss": 0.76953125, "step": 1626 }, { "dpo_loss": 0.29296875, "epoch": 0.26, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 8.504430626637214e-07, "loss": 0.189, "projector_lr": 2.5513291879911646e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 2.28125, "rewards_train/rejected": -3.125, "sft_loss": 0.859375, "step": 1627 }, { "dpo_loss": 0.1259765625, "epoch": 0.26, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 8.502619269969529e-07, "loss": 0.1377, "projector_lr": 2.5507857809908586e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.078125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.34375, "sft_loss": 0.625, "step": 1628 }, { "dpo_loss": 0.55859375, "epoch": 0.26, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 8.500807010208924e-07, "loss": 0.5951, "projector_lr": 2.550242103062677e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 2.625, "rewards_train/rejected": -3.4375, "sft_loss": 0.85546875, "step": 1629 }, { "dpo_loss": 0.1015625, "epoch": 0.26, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 8.498993847822662e-07, "loss": 0.136, "projector_lr": 2.549698154346799e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -3.375, "sft_loss": 0.55078125, "step": 1630 }, { "dpo_loss": 0.1513671875, "epoch": 0.26, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 8.497179783278239e-07, "loss": 0.1572, "projector_lr": 2.549153934983472e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1943359375, "rewards_train/margins": 3.609375, "rewards_train/rejected": -3.40625, "sft_loss": 0.83203125, "step": 1631 }, { "dpo_loss": 0.46875, "epoch": 0.26, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 8.49536481704338e-07, "loss": 0.3014, "projector_lr": 2.548609445113014e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.5625, "sft_loss": 0.6796875, "step": 1632 }, { "dpo_loss": 0.41796875, "epoch": 0.26, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 8.493548949586044e-07, "loss": 0.3372, "projector_lr": 2.5480646848758134e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.65625, "sft_loss": 0.74609375, "step": 1633 }, { "dpo_loss": 0.361328125, "epoch": 0.26, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 8.491732181374422e-07, "loss": 0.268, "projector_lr": 2.547519654412327e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.625, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.375, "sft_loss": 0.7265625, "step": 1634 }, { "dpo_loss": 0.609375, "epoch": 0.26, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 8.489914512876942e-07, "loss": 0.3908, "projector_lr": 2.5469743538630825e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.15625, "sft_loss": 0.49609375, "step": 1635 }, { "dpo_loss": 0.1572265625, "epoch": 0.26, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 8.488095944562255e-07, "loss": 0.2561, "projector_lr": 2.5464287833686764e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.04052734375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.53125, "sft_loss": 0.58984375, "step": 1636 }, { "dpo_loss": 0.044921875, "epoch": 0.26, "final_loss": 0.044921875, "grad_norm": 0.0, "learning_rate": 8.48627647689925e-07, "loss": 0.1191, "projector_lr": 2.5458829430697753e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.625, "sft_loss": 0.78125, "step": 1637 }, { "dpo_loss": 0.50390625, "epoch": 0.26, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 8.48445611035705e-07, "loss": 0.5263, "projector_lr": 2.545336833107115e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.40625, "sft_loss": 0.9375, "step": 1638 }, { "dpo_loss": 0.28125, "epoch": 0.26, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 8.482634845405004e-07, "loss": 0.3495, "projector_lr": 2.5447904536215013e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.75, "sft_loss": 0.9140625, "step": 1639 }, { "dpo_loss": 0.12255859375, "epoch": 0.26, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 8.480812682512695e-07, "loss": 0.1069, "projector_lr": 2.5442438047538087e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.40625, "sft_loss": 0.7734375, "step": 1640 }, { "dpo_loss": 0.11572265625, "epoch": 0.26, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 8.478989622149939e-07, "loss": 0.1317, "projector_lr": 2.543696886644982e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.625, "sft_loss": 0.73828125, "step": 1641 }, { "dpo_loss": 0.06689453125, "epoch": 0.26, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 8.477165664786781e-07, "loss": 0.1204, "projector_lr": 2.5431496994360346e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.625, "sft_loss": 0.66015625, "step": 1642 }, { "dpo_loss": 0.55078125, "epoch": 0.26, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 8.475340810893501e-07, "loss": 0.299, "projector_lr": 2.5426022432680506e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 1.40625, "rewards_train/rejected": -2.359375, "sft_loss": 0.80078125, "step": 1643 }, { "dpo_loss": 0.3984375, "epoch": 0.26, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 8.473515060940606e-07, "loss": 0.2374, "projector_lr": 2.542054518282182e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.1875, "sft_loss": 0.55078125, "step": 1644 }, { "dpo_loss": 0.11328125, "epoch": 0.26, "final_loss": 0.11328125, "grad_norm": 0.0, "learning_rate": 8.471688415398834e-07, "loss": 0.1966, "projector_lr": 2.5415065246196504e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.609375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.375, "sft_loss": 0.83984375, "step": 1645 }, { "dpo_loss": 0.421875, "epoch": 0.26, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 8.469860874739158e-07, "loss": 0.2497, "projector_lr": 2.5409582624217478e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.34375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.0625, "sft_loss": 0.84765625, "step": 1646 }, { "dpo_loss": 0.228515625, "epoch": 0.26, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 8.468032439432779e-07, "loss": 0.2123, "projector_lr": 2.540409731829834e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 3.34375, "rewards_train/rejected": -3.515625, "sft_loss": 0.5859375, "step": 1647 }, { "dpo_loss": 0.283203125, "epoch": 0.26, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 8.466203109951129e-07, "loss": 0.2169, "projector_lr": 2.539860932985339e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.75, "sft_loss": 0.62890625, "step": 1648 }, { "dpo_loss": 0.12353515625, "epoch": 0.26, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 8.46437288676587e-07, "loss": 0.2858, "projector_lr": 2.539311866029761e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.46875, "sft_loss": 0.765625, "step": 1649 }, { "dpo_loss": 0.1728515625, "epoch": 0.26, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 8.462541770348895e-07, "loss": 0.1411, "projector_lr": 2.5387625311046686e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.734375, "sft_loss": 0.76953125, "step": 1650 }, { "dpo_loss": 0.0771484375, "epoch": 0.26, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 8.460709761172327e-07, "loss": 0.0906, "projector_lr": 2.5382129283516987e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.162109375, "rewards_train/margins": 5.625, "rewards_train/rejected": -5.46875, "sft_loss": 0.734375, "step": 1651 }, { "dpo_loss": 0.125, "epoch": 0.26, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 8.458876859708522e-07, "loss": 0.1566, "projector_lr": 2.5376630579125565e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.15625, "sft_loss": 0.6015625, "step": 1652 }, { "dpo_loss": 0.2421875, "epoch": 0.26, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 8.45704306643006e-07, "loss": 0.1387, "projector_lr": 2.537112919929018e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.27734375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.953125, "sft_loss": 0.6953125, "step": 1653 }, { "dpo_loss": 0.10009765625, "epoch": 0.26, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 8.455208381809758e-07, "loss": 0.1193, "projector_lr": 2.5365625145429275e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.71875, "sft_loss": 0.578125, "step": 1654 }, { "dpo_loss": 0.06640625, "epoch": 0.26, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 8.453372806320655e-07, "loss": 0.1471, "projector_lr": 2.536011841896197e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.162109375, "rewards_train/margins": 4.5, "rewards_train/rejected": -4.65625, "sft_loss": 0.87890625, "step": 1655 }, { "dpo_loss": 0.5859375, "epoch": 0.26, "final_loss": 0.5859375, "grad_norm": 0.0, "learning_rate": 8.451536340436028e-07, "loss": 0.304, "projector_lr": 2.5354609021308085e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.78125, "sft_loss": 0.9609375, "step": 1656 }, { "dpo_loss": 0.04638671875, "epoch": 0.27, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 8.449698984629378e-07, "loss": 0.2006, "projector_lr": 2.5349096953888137e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -3.96875, "sft_loss": 0.7890625, "step": 1657 }, { "dpo_loss": 0.06201171875, "epoch": 0.27, "final_loss": 0.06201171875, "grad_norm": 0.0, "learning_rate": 8.447860739374437e-07, "loss": 0.1395, "projector_lr": 2.5343582218123315e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.15625, "sft_loss": 0.466796875, "step": 1658 }, { "dpo_loss": 0.09765625, "epoch": 0.27, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 8.446021605145167e-07, "loss": 0.0729, "projector_lr": 2.5338064815435502e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.9375, "sft_loss": 0.73828125, "step": 1659 }, { "dpo_loss": 0.2314453125, "epoch": 0.27, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 8.444181582415758e-07, "loss": 0.2019, "projector_lr": 2.5332544747247275e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.375, "sft_loss": 0.83984375, "step": 1660 }, { "dpo_loss": 0.89453125, "epoch": 0.27, "final_loss": 0.89453125, "grad_norm": 0.0, "learning_rate": 8.44234067166063e-07, "loss": 0.5614, "projector_lr": 2.5327022014981894e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.296875, "sft_loss": 0.6953125, "step": 1661 }, { "dpo_loss": 0.26953125, "epoch": 0.27, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 8.440498873354432e-07, "loss": 0.2217, "projector_lr": 2.5321496620063297e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.34375, "sft_loss": 0.6875, "step": 1662 }, { "dpo_loss": 0.1318359375, "epoch": 0.27, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 8.43865618797204e-07, "loss": 0.2236, "projector_lr": 2.531596856391612e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1142578125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.5625, "sft_loss": 0.7265625, "step": 1663 }, { "dpo_loss": 0.27734375, "epoch": 0.27, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 8.436812615988562e-07, "loss": 0.2082, "projector_lr": 2.531043784796569e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 2.234375, "rewards_train/rejected": -2.953125, "sft_loss": 0.6328125, "step": 1664 }, { "dpo_loss": 0.408203125, "epoch": 0.27, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 8.434968157879332e-07, "loss": 0.3299, "projector_lr": 2.5304904473637997e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5625, "rewards_train/margins": 1.734375, "rewards_train/rejected": -2.296875, "sft_loss": 0.703125, "step": 1665 }, { "dpo_loss": 0.1474609375, "epoch": 0.27, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 8.433122814119913e-07, "loss": 0.2318, "projector_lr": 2.529936844235974e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.140625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -3.515625, "sft_loss": 0.59765625, "step": 1666 }, { "dpo_loss": 0.1689453125, "epoch": 0.27, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 8.431276585186096e-07, "loss": 0.1155, "projector_lr": 2.529382975555829e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1220703125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -2.9375, "sft_loss": 0.8671875, "step": 1667 }, { "dpo_loss": 0.70703125, "epoch": 0.27, "final_loss": 0.70703125, "grad_norm": 0.0, "learning_rate": 8.429429471553902e-07, "loss": 0.4331, "projector_lr": 2.5288288414661707e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.796875, "rewards_train/margins": 1.765625, "rewards_train/rejected": -2.5625, "sft_loss": 0.462890625, "step": 1668 }, { "dpo_loss": 0.023681640625, "epoch": 0.27, "final_loss": 0.023681640625, "grad_norm": 0.0, "learning_rate": 8.427581473699579e-07, "loss": 0.2017, "projector_lr": 2.528274442109874e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.28125, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.0625, "sft_loss": 0.7265625, "step": 1669 }, { "dpo_loss": 0.515625, "epoch": 0.27, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 8.425732592099603e-07, "loss": 0.3874, "projector_lr": 2.527719777629881e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.259765625, "rewards_train/margins": 3.34375, "rewards_train/rejected": -3.609375, "sft_loss": 0.71484375, "step": 1670 }, { "dpo_loss": 0.05810546875, "epoch": 0.27, "final_loss": 0.05810546875, "grad_norm": 0.0, "learning_rate": 8.423882827230677e-07, "loss": 0.1741, "projector_lr": 2.5271648481692034e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.224609375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.78125, "sft_loss": 0.66015625, "step": 1671 }, { "dpo_loss": 0.306640625, "epoch": 0.27, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 8.422032179569734e-07, "loss": 0.1762, "projector_lr": 2.5266096538709204e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.41796875, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.421875, "sft_loss": 0.6875, "step": 1672 }, { "dpo_loss": 0.166015625, "epoch": 0.27, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 8.420180649593929e-07, "loss": 0.1197, "projector_lr": 2.5260541948781787e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.40625, "sft_loss": 0.796875, "step": 1673 }, { "dpo_loss": 0.361328125, "epoch": 0.27, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 8.418328237780654e-07, "loss": 0.321, "projector_lr": 2.5254984713341963e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.1044921875, "rewards_train/margins": 2.0625, "rewards_train/rejected": -2.15625, "sft_loss": 0.85546875, "step": 1674 }, { "dpo_loss": 0.310546875, "epoch": 0.27, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 8.416474944607521e-07, "loss": 0.2102, "projector_lr": 2.5249424833822565e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 2.234375, "rewards_train/rejected": -3.109375, "sft_loss": 0.69140625, "step": 1675 }, { "dpo_loss": 0.240234375, "epoch": 0.27, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 8.41462077055237e-07, "loss": 0.1662, "projector_lr": 2.524386231165711e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.734375, "sft_loss": 0.6015625, "step": 1676 }, { "dpo_loss": 0.11962890625, "epoch": 0.27, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 8.41276571609327e-07, "loss": 0.2375, "projector_lr": 2.5238297148279814e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19140625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -3.984375, "sft_loss": 0.7890625, "step": 1677 }, { "dpo_loss": 0.125, "epoch": 0.27, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 8.410909781708517e-07, "loss": 0.1072, "projector_lr": 2.5232729345125552e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.4375, "sft_loss": 0.78125, "step": 1678 }, { "dpo_loss": 0.1630859375, "epoch": 0.27, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 8.409052967876634e-07, "loss": 0.0913, "projector_lr": 2.5227158903629903e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.044677734375, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.03125, "sft_loss": 0.67578125, "step": 1679 }, { "dpo_loss": 0.09912109375, "epoch": 0.27, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 8.407195275076366e-07, "loss": 0.227, "projector_lr": 2.52215858252291e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.609375, "sft_loss": 0.640625, "step": 1680 }, { "dpo_loss": 0.2353515625, "epoch": 0.27, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 8.405336703786694e-07, "loss": 0.134, "projector_lr": 2.521601011136008e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.75, "sft_loss": 0.8984375, "step": 1681 }, { "dpo_loss": 0.1171875, "epoch": 0.27, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 8.403477254486818e-07, "loss": 0.1176, "projector_lr": 2.521043176346045e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.84375, "sft_loss": 0.76171875, "step": 1682 }, { "dpo_loss": 0.224609375, "epoch": 0.27, "final_loss": 0.224609375, "grad_norm": 0.0, "learning_rate": 8.401616927656164e-07, "loss": 0.2419, "projector_lr": 2.5204850782968496e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -5.0, "sft_loss": 0.73828125, "step": 1683 }, { "dpo_loss": 0.23046875, "epoch": 0.27, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 8.39975572377439e-07, "loss": 0.1424, "projector_lr": 2.519926717132317e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90625, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.90625, "sft_loss": 0.9765625, "step": 1684 }, { "dpo_loss": 0.1435546875, "epoch": 0.27, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 8.397893643321374e-07, "loss": 0.1865, "projector_lr": 2.5193680929964126e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5, "rewards_train/margins": 3.1875, "rewards_train/rejected": -3.703125, "sft_loss": 0.59375, "step": 1685 }, { "dpo_loss": 0.09814453125, "epoch": 0.27, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 8.396030686777227e-07, "loss": 0.0867, "projector_lr": 2.518809206033168e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 3.5625, "rewards_train/rejected": -3.875, "sft_loss": 0.53125, "step": 1686 }, { "dpo_loss": 0.251953125, "epoch": 0.27, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 8.394166854622279e-07, "loss": 0.4709, "projector_lr": 2.518250056386684e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 3.390625, "rewards_train/rejected": -3.875, "sft_loss": 0.640625, "step": 1687 }, { "dpo_loss": 0.474609375, "epoch": 0.27, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 8.392302147337088e-07, "loss": 0.2984, "projector_lr": 2.5176906442011267e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.703125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.765625, "sft_loss": 0.734375, "step": 1688 }, { "dpo_loss": 0.166015625, "epoch": 0.27, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 8.390436565402439e-07, "loss": 0.1105, "projector_lr": 2.517130969620732e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.171875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.5, "sft_loss": 0.80859375, "step": 1689 }, { "dpo_loss": 0.33984375, "epoch": 0.27, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 8.388570109299343e-07, "loss": 0.2588, "projector_lr": 2.5165710327898033e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.03125, "sft_loss": 0.85546875, "step": 1690 }, { "dpo_loss": 0.255859375, "epoch": 0.27, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 8.386702779509033e-07, "loss": 0.294, "projector_lr": 2.51601083385271e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -3.890625, "sft_loss": 0.78125, "step": 1691 }, { "dpo_loss": 0.6640625, "epoch": 0.27, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 8.384834576512971e-07, "loss": 0.4681, "projector_lr": 2.5154503729538915e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.28125, "rewards_train/margins": 2.625, "rewards_train/rejected": -3.90625, "sft_loss": 0.875, "step": 1692 }, { "dpo_loss": 0.248046875, "epoch": 0.27, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 8.38296550079284e-07, "loss": 0.3065, "projector_lr": 2.514889650237852e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.59375, "sft_loss": 0.73046875, "step": 1693 }, { "dpo_loss": 0.119140625, "epoch": 0.27, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 8.381095552830554e-07, "loss": 0.1359, "projector_lr": 2.5143286658491665e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.9375, "sft_loss": 0.83203125, "step": 1694 }, { "dpo_loss": 0.384765625, "epoch": 0.27, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 8.379224733108247e-07, "loss": 0.4312, "projector_lr": 2.5137674199324745e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.21875, "sft_loss": 0.78515625, "step": 1695 }, { "dpo_loss": 0.2158203125, "epoch": 0.27, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 8.377353042108278e-07, "loss": 0.1911, "projector_lr": 2.5132059126324835e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.875, "sft_loss": 0.8125, "step": 1696 }, { "dpo_loss": 0.34375, "epoch": 0.27, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 8.375480480313233e-07, "loss": 0.37, "projector_lr": 2.51264414409397e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 3.65625, "rewards_train/rejected": -5.03125, "sft_loss": 0.74609375, "step": 1697 }, { "dpo_loss": 0.00506591796875, "epoch": 0.27, "final_loss": 0.00506591796875, "grad_norm": 0.0, "learning_rate": 8.37360704820592e-07, "loss": 0.0901, "projector_lr": 2.512082114461776e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.40625, "sft_loss": 0.91796875, "step": 1698 }, { "dpo_loss": 0.11279296875, "epoch": 0.27, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 8.371732746269375e-07, "loss": 0.1444, "projector_lr": 2.5115198238808126e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.90625, "sft_loss": 0.82421875, "step": 1699 }, { "dpo_loss": 0.09716796875, "epoch": 0.27, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 8.369857574986853e-07, "loss": 0.0725, "projector_lr": 2.510957272496056e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.8125, "sft_loss": 0.703125, "step": 1700 }, { "dpo_loss": 0.263671875, "epoch": 0.27, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 8.367981534841841e-07, "loss": 0.2182, "projector_lr": 2.5103944604525527e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -3.84375, "sft_loss": 0.625, "step": 1701 }, { "dpo_loss": 0.1650390625, "epoch": 0.27, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 8.36610462631804e-07, "loss": 0.1977, "projector_lr": 2.509831387895412e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.5625, "sft_loss": 0.890625, "step": 1702 }, { "dpo_loss": 0.0208740234375, "epoch": 0.27, "final_loss": 0.0208740234375, "grad_norm": 0.0, "learning_rate": 8.364226849899382e-07, "loss": 0.3104, "projector_lr": 2.509268054969815e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.53125, "sft_loss": 0.65234375, "step": 1703 }, { "dpo_loss": 0.283203125, "epoch": 0.27, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 8.362348206070023e-07, "loss": 0.1924, "projector_lr": 2.508704461821007e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.8125, "sft_loss": 0.8125, "step": 1704 }, { "dpo_loss": 0.076171875, "epoch": 0.27, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 8.360468695314335e-07, "loss": 0.1112, "projector_lr": 2.508140608594301e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.65625, "sft_loss": 0.6171875, "step": 1705 }, { "dpo_loss": 0.21875, "epoch": 0.27, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 8.358588318116925e-07, "loss": 0.2054, "projector_lr": 2.5075764954350775e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.6875, "sft_loss": 0.7578125, "step": 1706 }, { "dpo_loss": 0.10546875, "epoch": 0.27, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 8.356707074962614e-07, "loss": 0.1076, "projector_lr": 2.5070121224887846e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 3.328125, "rewards_train/rejected": -5.09375, "sft_loss": 0.5703125, "step": 1707 }, { "dpo_loss": 0.189453125, "epoch": 0.27, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 8.354824966336451e-07, "loss": 0.1073, "projector_lr": 2.5064474899009356e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.3125, "sft_loss": 0.97265625, "step": 1708 }, { "dpo_loss": 0.09228515625, "epoch": 0.27, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 8.352941992723705e-07, "loss": 0.1091, "projector_lr": 2.505882597817112e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8125, "rewards_train/margins": 3.015625, "rewards_train/rejected": -4.84375, "sft_loss": 0.72265625, "step": 1709 }, { "dpo_loss": 0.103515625, "epoch": 0.27, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 8.351058154609874e-07, "loss": 0.0742, "projector_lr": 2.505317446382962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.21875, "sft_loss": 0.6328125, "step": 1710 }, { "dpo_loss": 0.408203125, "epoch": 0.27, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 8.34917345248067e-07, "loss": 0.4165, "projector_lr": 2.5047520357442013e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 1.84375, "rewards_train/rejected": -3.125, "sft_loss": 0.7578125, "step": 1711 }, { "dpo_loss": 0.1728515625, "epoch": 0.27, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 8.347287886822036e-07, "loss": 0.3188, "projector_lr": 2.504186366046611e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.6875, "sft_loss": 0.671875, "step": 1712 }, { "dpo_loss": 0.10693359375, "epoch": 0.27, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 8.345401458120132e-07, "loss": 0.0684, "projector_lr": 2.50362043743604e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.0625, "sft_loss": 0.87109375, "step": 1713 }, { "dpo_loss": 0.032470703125, "epoch": 0.27, "final_loss": 0.032470703125, "grad_norm": 0.0, "learning_rate": 8.343514166861343e-07, "loss": 0.0999, "projector_lr": 2.503054250058403e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.375, "sft_loss": 0.82421875, "step": 1714 }, { "dpo_loss": 0.1982421875, "epoch": 0.27, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 8.341626013532277e-07, "loss": 0.1162, "projector_lr": 2.502487804059683e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.984375, "sft_loss": 1.0078125, "step": 1715 }, { "dpo_loss": 0.53125, "epoch": 0.27, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 8.339736998619762e-07, "loss": 0.4343, "projector_lr": 2.501921099585929e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.5625, "rewards_train/margins": 1.53125, "rewards_train/rejected": -4.09375, "sft_loss": 0.8046875, "step": 1716 }, { "dpo_loss": 0.189453125, "epoch": 0.27, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 8.337847122610853e-07, "loss": 0.2403, "projector_lr": 2.501354136783256e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.0625, "sft_loss": 0.90234375, "step": 1717 }, { "dpo_loss": 0.083984375, "epoch": 0.27, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 8.335956385992817e-07, "loss": 0.0763, "projector_lr": 2.5007869157978452e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.34375, "sft_loss": 0.703125, "step": 1718 }, { "dpo_loss": 0.134765625, "epoch": 0.28, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 8.334064789253157e-07, "loss": 0.1449, "projector_lr": 2.5002194367759474e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.203125, "rewards_train/margins": 5.0, "rewards_train/rejected": -7.1875, "sft_loss": 0.71484375, "step": 1719 }, { "dpo_loss": 0.671875, "epoch": 0.28, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 8.332172332879584e-07, "loss": 0.4304, "projector_lr": 2.4996516998638756e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.46875, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.953125, "sft_loss": 0.64453125, "step": 1720 }, { "dpo_loss": 0.169921875, "epoch": 0.28, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 8.33027901736004e-07, "loss": 0.1147, "projector_lr": 2.4990837052080122e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.375, "sft_loss": 0.81640625, "step": 1721 }, { "dpo_loss": 0.3984375, "epoch": 0.28, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 8.328384843182685e-07, "loss": 0.499, "projector_lr": 2.4985154529548056e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.78125, "rewards_train/margins": 2.46875, "rewards_train/rejected": -4.25, "sft_loss": 0.8515625, "step": 1722 }, { "dpo_loss": 0.1162109375, "epoch": 0.28, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 8.326489810835902e-07, "loss": 0.197, "projector_lr": 2.497946943250771e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.671875, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.6875, "sft_loss": 0.7265625, "step": 1723 }, { "dpo_loss": 0.1748046875, "epoch": 0.28, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 8.32459392080829e-07, "loss": 0.2893, "projector_lr": 2.4973781762424874e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.125, "sft_loss": 0.91796875, "step": 1724 }, { "dpo_loss": 0.275390625, "epoch": 0.28, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 8.322697173588676e-07, "loss": 0.1473, "projector_lr": 2.4968091520766032e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.34375, "sft_loss": 0.84765625, "step": 1725 }, { "dpo_loss": 0.29296875, "epoch": 0.28, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 8.320799569666106e-07, "loss": 0.2013, "projector_lr": 2.496239870899832e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1875, "rewards_train/margins": 2.859375, "rewards_train/rejected": -4.0625, "sft_loss": 0.60546875, "step": 1726 }, { "dpo_loss": 0.056884765625, "epoch": 0.28, "final_loss": 0.056884765625, "grad_norm": 0.0, "learning_rate": 8.318901109529843e-07, "loss": 0.1701, "projector_lr": 2.4956703328589533e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8125, "rewards_train/margins": 3.21875, "rewards_train/rejected": -5.03125, "sft_loss": 1.015625, "step": 1727 }, { "dpo_loss": 0.625, "epoch": 0.28, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 8.317001793669378e-07, "loss": 0.4854, "projector_lr": 2.4951005381008135e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.15625, "rewards_train/margins": 1.6796875, "rewards_train/rejected": -3.84375, "sft_loss": 0.859375, "step": 1728 }, { "dpo_loss": 0.1279296875, "epoch": 0.28, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 8.315101622574414e-07, "loss": 0.1313, "projector_lr": 2.4945304867723245e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.796875, "rewards_train/rejected": -5.21875, "sft_loss": 0.6015625, "step": 1729 }, { "dpo_loss": 0.08544921875, "epoch": 0.28, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 8.313200596734882e-07, "loss": 0.2364, "projector_lr": 2.4939601790204646e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.46875, "sft_loss": 0.93359375, "step": 1730 }, { "dpo_loss": 0.1552734375, "epoch": 0.28, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 8.311298716640929e-07, "loss": 0.4042, "projector_lr": 2.4933896149922787e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.953125, "sft_loss": 0.59765625, "step": 1731 }, { "dpo_loss": 0.0294189453125, "epoch": 0.28, "final_loss": 0.0294189453125, "grad_norm": 0.0, "learning_rate": 8.309395982782923e-07, "loss": 0.0381, "projector_lr": 2.492818794834877e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.28125, "sft_loss": 0.5390625, "step": 1732 }, { "dpo_loss": 0.11865234375, "epoch": 0.28, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 8.307492395651454e-07, "loss": 0.3964, "projector_lr": 2.4922477186954366e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.3125, "sft_loss": 0.95703125, "step": 1733 }, { "dpo_loss": 0.150390625, "epoch": 0.28, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 8.30558795573733e-07, "loss": 0.1746, "projector_lr": 2.4916763867211993e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.53125, "sft_loss": 0.89453125, "step": 1734 }, { "dpo_loss": 0.625, "epoch": 0.28, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 8.303682663531581e-07, "loss": 0.3689, "projector_lr": 2.4911047990594747e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -4.78125, "sft_loss": 0.94140625, "step": 1735 }, { "dpo_loss": 0.1220703125, "epoch": 0.28, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 8.301776519525454e-07, "loss": 0.4684, "projector_lr": 2.4905329558576365e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.0625, "sft_loss": 0.90234375, "step": 1736 }, { "dpo_loss": 0.12109375, "epoch": 0.28, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 8.299869524210416e-07, "loss": 0.3698, "projector_lr": 2.4899608572631254e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.375, "sft_loss": 0.640625, "step": 1737 }, { "dpo_loss": 0.146484375, "epoch": 0.28, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 8.297961678078158e-07, "loss": 0.1692, "projector_lr": 2.4893885034234476e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 2.625, "rewards_train/rejected": -4.40625, "sft_loss": 0.953125, "step": 1738 }, { "dpo_loss": 0.16796875, "epoch": 0.28, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 8.296052981620583e-07, "loss": 0.2386, "projector_lr": 2.488815894486175e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.125, "sft_loss": 0.8671875, "step": 1739 }, { "dpo_loss": 0.369140625, "epoch": 0.28, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 8.294143435329818e-07, "loss": 0.2624, "projector_lr": 2.4882430305989457e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 2.640625, "rewards_train/rejected": -4.21875, "sft_loss": 1.0, "step": 1740 }, { "dpo_loss": 0.35546875, "epoch": 0.28, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 8.292233039698208e-07, "loss": 0.2999, "projector_lr": 2.487669911909463e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 2.375, "rewards_train/rejected": -3.171875, "sft_loss": 0.86328125, "step": 1741 }, { "dpo_loss": 0.34765625, "epoch": 0.28, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 8.29032179521832e-07, "loss": 0.3294, "projector_lr": 2.4870965385654964e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.375, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.71875, "sft_loss": 0.921875, "step": 1742 }, { "dpo_loss": 0.08349609375, "epoch": 0.28, "final_loss": 0.08349609375, "grad_norm": 0.0, "learning_rate": 8.288409702382934e-07, "loss": 0.2385, "projector_lr": 2.4865229107148807e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.984375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.65625, "sft_loss": 0.6953125, "step": 1743 }, { "dpo_loss": 0.2373046875, "epoch": 0.28, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 8.286496761685053e-07, "loss": 0.4389, "projector_lr": 2.4859490285055162e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.734375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.375, "sft_loss": 0.9453125, "step": 1744 }, { "dpo_loss": 0.75, "epoch": 0.28, "final_loss": 0.75, "grad_norm": 0.0, "learning_rate": 8.284582973617897e-07, "loss": 0.4294, "projector_lr": 2.4853748920853694e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.390625, "rewards_train/margins": 1.0703125, "rewards_train/rejected": -3.46875, "sft_loss": 0.96484375, "step": 1745 }, { "dpo_loss": 0.078125, "epoch": 0.28, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 8.282668338674905e-07, "loss": 0.225, "projector_lr": 2.4848005016024716e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.53125, "sft_loss": 0.9453125, "step": 1746 }, { "dpo_loss": 0.1103515625, "epoch": 0.28, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 8.280752857349733e-07, "loss": 0.3042, "projector_lr": 2.48422585720492e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.78125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.875, "sft_loss": 0.63671875, "step": 1747 }, { "dpo_loss": 0.373046875, "epoch": 0.28, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 8.278836530136258e-07, "loss": 0.2877, "projector_lr": 2.4836509590408775e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.015625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -4.75, "sft_loss": 0.7578125, "step": 1748 }, { "dpo_loss": 0.2158203125, "epoch": 0.28, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 8.276919357528574e-07, "loss": 0.3202, "projector_lr": 2.483075807258572e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.796875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -6.15625, "sft_loss": 0.7265625, "step": 1749 }, { "dpo_loss": 0.236328125, "epoch": 0.28, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 8.275001340020991e-07, "loss": 0.2005, "projector_lr": 2.4825004020062976e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 2.078125, "rewards_train/rejected": -3.34375, "sft_loss": 0.8046875, "step": 1750 }, { "dpo_loss": 0.115234375, "epoch": 0.28, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 8.27308247810804e-07, "loss": 0.2403, "projector_lr": 2.481924743432412e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.796875, "rewards_train/margins": 7.59375, "rewards_train/rejected": -9.375, "sft_loss": 0.6953125, "step": 1751 }, { "dpo_loss": 0.142578125, "epoch": 0.28, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 8.271162772284465e-07, "loss": 0.2104, "projector_lr": 2.48134883168534e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.0, "rewards_train/margins": 4.09375, "rewards_train/rejected": -6.09375, "sft_loss": 0.90234375, "step": 1752 }, { "dpo_loss": 0.341796875, "epoch": 0.28, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 8.269242223045235e-07, "loss": 0.1882, "projector_lr": 2.4807726669135705e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.9375, "sft_loss": 1.09375, "step": 1753 }, { "dpo_loss": 0.4375, "epoch": 0.28, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 8.267320830885529e-07, "loss": 0.2458, "projector_lr": 2.480196249265659e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.203125, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.15625, "sft_loss": 0.8125, "step": 1754 }, { "dpo_loss": 0.0145263671875, "epoch": 0.28, "final_loss": 0.0145263671875, "grad_norm": 0.0, "learning_rate": 8.265398596300747e-07, "loss": 0.0957, "projector_lr": 2.479619578890224e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.546875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.84375, "sft_loss": 0.92578125, "step": 1755 }, { "dpo_loss": 0.25390625, "epoch": 0.28, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 8.263475519786505e-07, "loss": 0.1758, "projector_lr": 2.4790426559359517e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.03125, "rewards_train/margins": 3.53125, "rewards_train/rejected": -5.5625, "sft_loss": 0.921875, "step": 1756 }, { "dpo_loss": 0.0703125, "epoch": 0.28, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 8.261551601838639e-07, "loss": 0.049, "projector_lr": 2.478465480551592e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.75, "sft_loss": 0.76953125, "step": 1757 }, { "dpo_loss": 0.055908203125, "epoch": 0.28, "final_loss": 0.055908203125, "grad_norm": 0.0, "learning_rate": 8.259626842953198e-07, "loss": 0.1122, "projector_lr": 2.4778880528859595e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -7.15625, "sft_loss": 0.6640625, "step": 1758 }, { "dpo_loss": 0.1435546875, "epoch": 0.28, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 8.257701243626449e-07, "loss": 0.091, "projector_lr": 2.477310373087935e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.265625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -7.3125, "sft_loss": 0.67578125, "step": 1759 }, { "dpo_loss": 0.12255859375, "epoch": 0.28, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 8.255774804354878e-07, "loss": 0.107, "projector_lr": 2.4767324413064635e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 3.5, "rewards_train/rejected": -5.375, "sft_loss": 0.64453125, "step": 1760 }, { "dpo_loss": 0.51953125, "epoch": 0.28, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 8.253847525635184e-07, "loss": 0.3087, "projector_lr": 2.4761542576905556e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8125, "rewards_train/margins": 1.015625, "rewards_train/rejected": -2.828125, "sft_loss": 0.8984375, "step": 1761 }, { "dpo_loss": 0.421875, "epoch": 0.28, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 8.251919407964286e-07, "loss": 0.3906, "projector_lr": 2.475575822389286e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.453125, "sft_loss": 0.86328125, "step": 1762 }, { "dpo_loss": 0.1484375, "epoch": 0.28, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 8.249990451839315e-07, "loss": 0.1473, "projector_lr": 2.4749971355517944e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.4375, "sft_loss": 0.93359375, "step": 1763 }, { "dpo_loss": 0.341796875, "epoch": 0.28, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 8.248060657757624e-07, "loss": 0.2248, "projector_lr": 2.4744181973272874e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6875, "rewards_train/margins": 2.984375, "rewards_train/rejected": -4.65625, "sft_loss": 0.91015625, "step": 1764 }, { "dpo_loss": 0.1708984375, "epoch": 0.28, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 8.246130026216776e-07, "loss": 0.1873, "projector_lr": 2.473839007865033e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.625, "rewards_train/margins": 2.59375, "rewards_train/rejected": -4.21875, "sft_loss": 0.64453125, "step": 1765 }, { "dpo_loss": 0.08251953125, "epoch": 0.28, "final_loss": 0.08251953125, "grad_norm": 0.0, "learning_rate": 8.244198557714552e-07, "loss": 0.366, "projector_lr": 2.4732595673143658e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.0625, "sft_loss": 0.8515625, "step": 1766 }, { "dpo_loss": 0.09716796875, "epoch": 0.28, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 8.242266252748955e-07, "loss": 0.1997, "projector_lr": 2.4726798758246865e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.859375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.65625, "sft_loss": 0.83984375, "step": 1767 }, { "dpo_loss": 0.5234375, "epoch": 0.28, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 8.240333111818191e-07, "loss": 0.2928, "projector_lr": 2.4720999335454575e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.546875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -4.8125, "sft_loss": 0.96875, "step": 1768 }, { "dpo_loss": 0.412109375, "epoch": 0.28, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 8.238399135420693e-07, "loss": 0.2343, "projector_lr": 2.471519740626208e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.921875, "rewards_train/margins": 2.53125, "rewards_train/rejected": -5.4375, "sft_loss": 0.9453125, "step": 1769 }, { "dpo_loss": 0.10888671875, "epoch": 0.28, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 8.236464324055104e-07, "loss": 0.1618, "projector_lr": 2.4709392972165313e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.609375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -5.4375, "sft_loss": 0.8515625, "step": 1770 }, { "dpo_loss": 0.21484375, "epoch": 0.28, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 8.234528678220283e-07, "loss": 0.187, "projector_lr": 2.470358603466085e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -3.921875, "sft_loss": 0.71875, "step": 1771 }, { "dpo_loss": 0.0498046875, "epoch": 0.28, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 8.232592198415304e-07, "loss": 0.092, "projector_lr": 2.4697776595245917e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.125, "sft_loss": 1.09375, "step": 1772 }, { "dpo_loss": 0.265625, "epoch": 0.28, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 8.230654885139459e-07, "loss": 0.1859, "projector_lr": 2.469196465541838e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.6875, "sft_loss": 0.6953125, "step": 1773 }, { "dpo_loss": 0.2578125, "epoch": 0.28, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 8.22871673889225e-07, "loss": 0.1792, "projector_lr": 2.468615021667675e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 2.984375, "rewards_train/rejected": -5.125, "sft_loss": 0.8515625, "step": 1774 }, { "dpo_loss": 0.1455078125, "epoch": 0.28, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 8.226777760173396e-07, "loss": 0.2331, "projector_lr": 2.4680333280520194e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.03125, "sft_loss": 0.796875, "step": 1775 }, { "dpo_loss": 0.310546875, "epoch": 0.28, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 8.224837949482835e-07, "loss": 0.2084, "projector_lr": 2.4674513848448505e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 1.890625, "rewards_train/rejected": -3.21875, "sft_loss": 0.609375, "step": 1776 }, { "dpo_loss": 0.220703125, "epoch": 0.28, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 8.222897307320708e-07, "loss": 0.285, "projector_lr": 2.4668691921962127e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.078125, "rewards_train/margins": 2.34375, "rewards_train/rejected": -4.40625, "sft_loss": 0.9921875, "step": 1777 }, { "dpo_loss": 0.083984375, "epoch": 0.28, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 8.220955834187387e-07, "loss": 0.3272, "projector_lr": 2.466286750256216e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.828125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.9375, "sft_loss": 0.77734375, "step": 1778 }, { "dpo_loss": 0.484375, "epoch": 0.28, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 8.219013530583441e-07, "loss": 0.4011, "projector_lr": 2.4657040591750325e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 3.359375, "rewards_train/rejected": -5.5, "sft_loss": 0.69921875, "step": 1779 }, { "dpo_loss": 0.060302734375, "epoch": 0.28, "final_loss": 0.060302734375, "grad_norm": 0.0, "learning_rate": 8.217070397009665e-07, "loss": 0.1648, "projector_lr": 2.4651211191028997e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.015625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -5.6875, "sft_loss": 1.0703125, "step": 1780 }, { "dpo_loss": 0.2041015625, "epoch": 0.28, "final_loss": 0.2041015625, "grad_norm": 0.0, "learning_rate": 8.215126433967062e-07, "loss": 0.1377, "projector_lr": 2.4645379301901187e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.03125, "sft_loss": 0.6171875, "step": 1781 }, { "dpo_loss": 0.035400390625, "epoch": 0.29, "final_loss": 0.035400390625, "grad_norm": 0.0, "learning_rate": 8.213181641956855e-07, "loss": 0.2345, "projector_lr": 2.4639544925870566e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -7.25, "sft_loss": 0.81640625, "step": 1782 }, { "dpo_loss": 0.05517578125, "epoch": 0.29, "final_loss": 0.05517578125, "grad_norm": 0.0, "learning_rate": 8.21123602148047e-07, "loss": 0.2099, "projector_lr": 2.463370806444141e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.59375, "sft_loss": 0.7578125, "step": 1783 }, { "dpo_loss": 0.67578125, "epoch": 0.29, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 8.209289573039559e-07, "loss": 0.421, "projector_lr": 2.462786871911868e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.328125, "rewards_train/margins": 2.09375, "rewards_train/rejected": -4.4375, "sft_loss": 0.87109375, "step": 1784 }, { "dpo_loss": 0.024658203125, "epoch": 0.29, "final_loss": 0.024658203125, "grad_norm": 0.0, "learning_rate": 8.207342297135982e-07, "loss": 0.1333, "projector_lr": 2.462202689140795e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.1875, "sft_loss": 1.203125, "step": 1785 }, { "dpo_loss": 0.0732421875, "epoch": 0.29, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 8.205394194271808e-07, "loss": 0.165, "projector_lr": 2.4616182582815426e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.625, "sft_loss": 0.6875, "step": 1786 }, { "dpo_loss": 0.53515625, "epoch": 0.29, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 8.203445264949327e-07, "loss": 0.4387, "projector_lr": 2.4610335794847982e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.828125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -4.375, "sft_loss": 0.8359375, "step": 1787 }, { "dpo_loss": 0.234375, "epoch": 0.29, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 8.201495509671036e-07, "loss": 0.4, "projector_lr": 2.460448652901311e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.59375, "sft_loss": 0.69140625, "step": 1788 }, { "dpo_loss": 0.63671875, "epoch": 0.29, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 8.199544928939649e-07, "loss": 0.543, "projector_lr": 2.459863478681895e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 1.1171875, "rewards_train/rejected": -2.734375, "sft_loss": 0.87890625, "step": 1789 }, { "dpo_loss": 0.047119140625, "epoch": 0.29, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 8.197593523258091e-07, "loss": 0.0744, "projector_lr": 2.4592780569774272e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.375, "sft_loss": 0.7734375, "step": 1790 }, { "dpo_loss": 0.259765625, "epoch": 0.29, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 8.195641293129498e-07, "loss": 0.1941, "projector_lr": 2.4586923879388495e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.484375, "rewards_train/margins": 4.0, "rewards_train/rejected": -6.5, "sft_loss": 0.8046875, "step": 1791 }, { "dpo_loss": 0.58203125, "epoch": 0.29, "final_loss": 0.58203125, "grad_norm": 0.0, "learning_rate": 8.193688239057224e-07, "loss": 0.5511, "projector_lr": 2.4581064717171672e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.609375, "rewards_train/margins": 3.21875, "rewards_train/rejected": -4.8125, "sft_loss": 0.8515625, "step": 1792 }, { "dpo_loss": 0.49609375, "epoch": 0.29, "final_loss": 0.49609375, "grad_norm": 0.0, "learning_rate": 8.191734361544828e-07, "loss": 0.4918, "projector_lr": 2.4575203084634487e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.171875, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.59375, "sft_loss": 0.734375, "step": 1793 }, { "dpo_loss": 0.59765625, "epoch": 0.29, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 8.189779661096091e-07, "loss": 0.3945, "projector_lr": 2.456933898328827e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 2.21875, "rewards_train/rejected": -3.015625, "sft_loss": 0.70703125, "step": 1794 }, { "dpo_loss": 0.1845703125, "epoch": 0.29, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 8.187824138214994e-07, "loss": 0.439, "projector_lr": 2.4563472414644983e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.625, "sft_loss": 0.5859375, "step": 1795 }, { "dpo_loss": 0.0264892578125, "epoch": 0.29, "final_loss": 0.0264892578125, "grad_norm": 0.0, "learning_rate": 8.18586779340574e-07, "loss": 0.1716, "projector_lr": 2.4557603380217223e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.09375, "sft_loss": 0.7109375, "step": 1796 }, { "dpo_loss": 0.42578125, "epoch": 0.29, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 8.183910627172739e-07, "loss": 0.2819, "projector_lr": 2.4551731881518217e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.375, "sft_loss": 0.7890625, "step": 1797 }, { "dpo_loss": 0.024658203125, "epoch": 0.29, "final_loss": 0.024658203125, "grad_norm": 0.0, "learning_rate": 8.181952640020616e-07, "loss": 0.542, "projector_lr": 2.454585792006185e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.796875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -6.09375, "sft_loss": 0.63671875, "step": 1798 }, { "dpo_loss": 0.1171875, "epoch": 0.29, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 8.179993832454204e-07, "loss": 0.1728, "projector_lr": 2.4539981497362615e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.46875, "sft_loss": 0.55078125, "step": 1799 }, { "dpo_loss": 0.2158203125, "epoch": 0.29, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 8.178034204978552e-07, "loss": 0.1315, "projector_lr": 2.453410261493566e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.6875, "sft_loss": 0.74609375, "step": 1800 }, { "dpo_loss": 0.384765625, "epoch": 0.29, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 8.176073758098914e-07, "loss": 0.2917, "projector_lr": 2.4528221274296744e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.28125, "sft_loss": 0.55859375, "step": 1801 }, { "dpo_loss": 0.12255859375, "epoch": 0.29, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 8.174112492320762e-07, "loss": 0.1724, "projector_lr": 2.4522337476962286e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.390625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.28125, "sft_loss": 0.8046875, "step": 1802 }, { "dpo_loss": 0.203125, "epoch": 0.29, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 8.172150408149774e-07, "loss": 0.3206, "projector_lr": 2.4516451224449324e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.375, "sft_loss": 0.984375, "step": 1803 }, { "dpo_loss": 0.11962890625, "epoch": 0.29, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 8.170187506091843e-07, "loss": 0.7036, "projector_lr": 2.4510562518275532e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.59375, "sft_loss": 0.64453125, "step": 1804 }, { "dpo_loss": 0.09765625, "epoch": 0.29, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 8.168223786653072e-07, "loss": 0.4111, "projector_lr": 2.450467135995922e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.546875, "step": 1805 }, { "dpo_loss": 0.55859375, "epoch": 0.29, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 8.166259250339771e-07, "loss": 0.4942, "projector_lr": 2.4498777751019315e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.671875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.59375, "sft_loss": 0.8671875, "step": 1806 }, { "dpo_loss": 0.080078125, "epoch": 0.29, "final_loss": 0.080078125, "grad_norm": 0.0, "learning_rate": 8.164293897658466e-07, "loss": 0.0562, "projector_lr": 2.4492881692975396e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.875, "sft_loss": 0.69921875, "step": 1807 }, { "dpo_loss": 0.138671875, "epoch": 0.29, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 8.162327729115889e-07, "loss": 0.2723, "projector_lr": 2.4486983187347668e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.59375, "sft_loss": 0.8125, "step": 1808 }, { "dpo_loss": 0.30078125, "epoch": 0.29, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 8.160360745218984e-07, "loss": 0.2491, "projector_lr": 2.4481082235656954e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.671875, "sft_loss": 1.0, "step": 1809 }, { "dpo_loss": 0.146484375, "epoch": 0.29, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 8.158392946474908e-07, "loss": 0.3366, "projector_lr": 2.4475178839424724e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.578125, "sft_loss": 0.78515625, "step": 1810 }, { "dpo_loss": 0.193359375, "epoch": 0.29, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 8.156424333391024e-07, "loss": 0.184, "projector_lr": 2.4469273000173076e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 2.203125, "rewards_train/rejected": -3.171875, "sft_loss": 0.71875, "step": 1811 }, { "dpo_loss": 0.18359375, "epoch": 0.29, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 8.154454906474908e-07, "loss": 0.4348, "projector_lr": 2.4463364719424724e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.71875, "sft_loss": 0.80078125, "step": 1812 }, { "dpo_loss": 0.50390625, "epoch": 0.29, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 8.152484666234341e-07, "loss": 0.6158, "projector_lr": 2.445745399870303e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.09375, "sft_loss": 0.72265625, "step": 1813 }, { "dpo_loss": 0.263671875, "epoch": 0.29, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 8.150513613177321e-07, "loss": 0.3717, "projector_lr": 2.4451540839531966e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 2.515625, "rewards_train/rejected": -3.5, "sft_loss": 0.66015625, "step": 1814 }, { "dpo_loss": 0.427734375, "epoch": 0.29, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 8.14854174781205e-07, "loss": 0.4892, "projector_lr": 2.4445625243436153e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 1.875, "rewards_train/rejected": -2.53125, "sft_loss": 0.91015625, "step": 1815 }, { "dpo_loss": 0.23828125, "epoch": 0.29, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 8.146569070646942e-07, "loss": 0.2102, "projector_lr": 2.443970721194083e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.21875, "sft_loss": 0.890625, "step": 1816 }, { "dpo_loss": 0.1728515625, "epoch": 0.29, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 8.144595582190617e-07, "loss": 0.1732, "projector_lr": 2.4433786746571855e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.828125, "rewards_train/margins": 2.9375, "rewards_train/rejected": -3.75, "sft_loss": 0.61328125, "step": 1817 }, { "dpo_loss": 0.28125, "epoch": 0.29, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 8.142621282951909e-07, "loss": 0.2306, "projector_lr": 2.442786384885573e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 3.515625, "rewards_train/rejected": -5.25, "sft_loss": 0.6484375, "step": 1818 }, { "dpo_loss": 0.05078125, "epoch": 0.29, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 8.140646173439858e-07, "loss": 0.0775, "projector_lr": 2.4421938520319575e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.5625, "sft_loss": 0.63671875, "step": 1819 }, { "dpo_loss": 0.3359375, "epoch": 0.29, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 8.138670254163714e-07, "loss": 0.2355, "projector_lr": 2.4416010762491142e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.875, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.03125, "sft_loss": 0.84765625, "step": 1820 }, { "dpo_loss": 0.2275390625, "epoch": 0.29, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 8.136693525632935e-07, "loss": 0.1553, "projector_lr": 2.4410080576898807e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.28125, "sft_loss": 0.70703125, "step": 1821 }, { "dpo_loss": 0.0281982421875, "epoch": 0.29, "final_loss": 0.0281982421875, "grad_norm": 0.0, "learning_rate": 8.134715988357188e-07, "loss": 0.0687, "projector_lr": 2.4404147965071566e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.125, "sft_loss": 0.67578125, "step": 1822 }, { "dpo_loss": 0.10302734375, "epoch": 0.29, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 8.132737642846348e-07, "loss": 0.2318, "projector_lr": 2.4398212928539046e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.53125, "sft_loss": 0.8828125, "step": 1823 }, { "dpo_loss": 0.373046875, "epoch": 0.29, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 8.130758489610499e-07, "loss": 0.3099, "projector_lr": 2.4392275468831496e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.96875, "sft_loss": 0.71875, "step": 1824 }, { "dpo_loss": 0.1923828125, "epoch": 0.29, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 8.128778529159934e-07, "loss": 0.2261, "projector_lr": 2.4386335587479803e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.875, "sft_loss": 0.8046875, "step": 1825 }, { "dpo_loss": 0.23828125, "epoch": 0.29, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 8.126797762005153e-07, "loss": 0.1606, "projector_lr": 2.438039328601546e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.796875, "rewards_train/rejected": -5.1875, "sft_loss": 0.8046875, "step": 1826 }, { "dpo_loss": 0.2236328125, "epoch": 0.29, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 8.124816188656865e-07, "loss": 0.1853, "projector_lr": 2.4374448565970594e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.5, "sft_loss": 0.84375, "step": 1827 }, { "dpo_loss": 0.0439453125, "epoch": 0.29, "final_loss": 0.0439453125, "grad_norm": 0.0, "learning_rate": 8.122833809625984e-07, "loss": 0.1288, "projector_lr": 2.4368501428877952e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 6.5625, "rewards_train/rejected": -7.125, "sft_loss": 0.63671875, "step": 1828 }, { "dpo_loss": 0.007232666015625, "epoch": 0.29, "final_loss": 0.007232666015625, "grad_norm": 0.0, "learning_rate": 8.120850625423636e-07, "loss": 0.0649, "projector_lr": 2.436255187627091e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 8.125, "rewards_train/rejected": -8.9375, "sft_loss": 0.640625, "step": 1829 }, { "dpo_loss": 0.251953125, "epoch": 0.29, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 8.118866636561152e-07, "loss": 0.2299, "projector_lr": 2.435659990968346e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -5.90625, "sft_loss": 0.796875, "step": 1830 }, { "dpo_loss": 0.259765625, "epoch": 0.29, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 8.116881843550072e-07, "loss": 0.268, "projector_lr": 2.435064553065022e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 2.78125, "rewards_train/rejected": -3.796875, "sft_loss": 0.734375, "step": 1831 }, { "dpo_loss": 0.404296875, "epoch": 0.29, "final_loss": 0.404296875, "grad_norm": 0.0, "learning_rate": 8.11489624690214e-07, "loss": 0.309, "projector_lr": 2.4344688740706422e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 2.234375, "rewards_train/rejected": -3.03125, "sft_loss": 0.90625, "step": 1832 }, { "dpo_loss": 0.04931640625, "epoch": 0.29, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 8.11290984712931e-07, "loss": 0.1348, "projector_lr": 2.433872954138793e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.59375, "sft_loss": 0.71484375, "step": 1833 }, { "dpo_loss": 0.55078125, "epoch": 0.29, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 8.110922644743745e-07, "loss": 0.4057, "projector_lr": 2.4332767934231235e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 2.078125, "rewards_train/rejected": -3.71875, "sft_loss": 0.640625, "step": 1834 }, { "dpo_loss": 0.373046875, "epoch": 0.29, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 8.10893464025781e-07, "loss": 0.3384, "projector_lr": 2.432680392077343e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.1875, "sft_loss": 0.79296875, "step": 1835 }, { "dpo_loss": 0.2275390625, "epoch": 0.29, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 8.10694583418408e-07, "loss": 0.1724, "projector_lr": 2.4320837502552243e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.3125, "sft_loss": 0.640625, "step": 1836 }, { "dpo_loss": 0.2314453125, "epoch": 0.29, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 8.104956227035336e-07, "loss": 0.1534, "projector_lr": 2.431486868110601e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.75, "sft_loss": 0.6484375, "step": 1837 }, { "dpo_loss": 0.6953125, "epoch": 0.29, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 8.102965819324566e-07, "loss": 0.4259, "projector_lr": 2.43088974579737e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.546875, "sft_loss": 0.7578125, "step": 1838 }, { "dpo_loss": 0.298828125, "epoch": 0.29, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 8.100974611564964e-07, "loss": 0.3931, "projector_lr": 2.4302923834694895e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.640625, "sft_loss": 0.83203125, "step": 1839 }, { "dpo_loss": 0.1591796875, "epoch": 0.29, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 8.09898260426993e-07, "loss": 0.2767, "projector_lr": 2.4296947812809793e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.0, "sft_loss": 0.82421875, "step": 1840 }, { "dpo_loss": 0.07177734375, "epoch": 0.29, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 8.096989797953071e-07, "loss": 0.1092, "projector_lr": 2.4290969393859215e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.75, "sft_loss": 0.66796875, "step": 1841 }, { "dpo_loss": 0.1337890625, "epoch": 0.29, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 8.094996193128198e-07, "loss": 0.1299, "projector_lr": 2.4284988579384593e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 3.1875, "rewards_train/rejected": -3.890625, "sft_loss": 0.5078125, "step": 1842 }, { "dpo_loss": 0.4765625, "epoch": 0.29, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 8.09300179030933e-07, "loss": 0.3514, "projector_lr": 2.4279005370927995e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.765625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.4375, "sft_loss": 0.78515625, "step": 1843 }, { "dpo_loss": 0.189453125, "epoch": 0.3, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 8.091006590010691e-07, "loss": 0.1221, "projector_lr": 2.4273019770032076e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.875, "sft_loss": 0.8984375, "step": 1844 }, { "dpo_loss": 0.02001953125, "epoch": 0.3, "final_loss": 0.02001953125, "grad_norm": 0.0, "learning_rate": 8.08901059274671e-07, "loss": 0.1326, "projector_lr": 2.426703177824013e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.953125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.5625, "sft_loss": 0.81640625, "step": 1845 }, { "dpo_loss": 0.1630859375, "epoch": 0.3, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 8.087013799032026e-07, "loss": 0.1976, "projector_lr": 2.4261041397096076e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.125, "sft_loss": 0.76953125, "step": 1846 }, { "dpo_loss": 0.359375, "epoch": 0.3, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 8.085016209381473e-07, "loss": 0.3855, "projector_lr": 2.425504862814442e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 3.375, "rewards_train/rejected": -5.1875, "sft_loss": 0.91015625, "step": 1847 }, { "dpo_loss": 0.4765625, "epoch": 0.3, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 8.083017824310102e-07, "loss": 0.3587, "projector_lr": 2.424905347293031e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 1.859375, "rewards_train/rejected": -3.359375, "sft_loss": 0.765625, "step": 1848 }, { "dpo_loss": 0.2099609375, "epoch": 0.3, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 8.081018644333161e-07, "loss": 0.1419, "projector_lr": 2.424305593299949e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.75, "sft_loss": 0.484375, "step": 1849 }, { "dpo_loss": 0.0771484375, "epoch": 0.3, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 8.079018669966109e-07, "loss": 0.331, "projector_lr": 2.4237056009898327e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.0625, "sft_loss": 0.83984375, "step": 1850 }, { "dpo_loss": 0.09521484375, "epoch": 0.3, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 8.077017901724602e-07, "loss": 0.1632, "projector_lr": 2.4231053705173808e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.078125, "rewards_train/margins": 4.0, "rewards_train/rejected": -6.09375, "sft_loss": 0.52734375, "step": 1851 }, { "dpo_loss": 0.0201416015625, "epoch": 0.3, "final_loss": 0.0201416015625, "grad_norm": 0.0, "learning_rate": 8.07501634012451e-07, "loss": 0.2253, "projector_lr": 2.422504902037353e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.53125, "sft_loss": 0.5078125, "step": 1852 }, { "dpo_loss": 0.10546875, "epoch": 0.3, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 8.073013985681898e-07, "loss": 0.0845, "projector_lr": 2.4219041957045698e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.8125, "sft_loss": 0.84375, "step": 1853 }, { "dpo_loss": 0.06591796875, "epoch": 0.3, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 8.071010838913043e-07, "loss": 0.0615, "projector_lr": 2.421303251673913e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -7.0625, "sft_loss": 0.7734375, "step": 1854 }, { "dpo_loss": 0.40625, "epoch": 0.3, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 8.069006900334424e-07, "loss": 0.3578, "projector_lr": 2.4207020701003273e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.25, "sft_loss": 0.5625, "step": 1855 }, { "dpo_loss": 0.466796875, "epoch": 0.3, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 8.067002170462722e-07, "loss": 0.5225, "projector_lr": 2.4201006511388167e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 2.5625, "rewards_train/rejected": -4.03125, "sft_loss": 0.6796875, "step": 1856 }, { "dpo_loss": 0.2265625, "epoch": 0.3, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 8.064996649814826e-07, "loss": 0.1861, "projector_lr": 2.419498994944448e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.40625, "sft_loss": 0.859375, "step": 1857 }, { "dpo_loss": 0.494140625, "epoch": 0.3, "final_loss": 0.494140625, "grad_norm": 0.0, "learning_rate": 8.062990338907825e-07, "loss": 0.3302, "projector_lr": 2.4188971016723474e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.4375, "sft_loss": 0.671875, "step": 1858 }, { "dpo_loss": 0.203125, "epoch": 0.3, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 8.060983238259013e-07, "loss": 0.225, "projector_lr": 2.4182949714777044e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.625, "sft_loss": 0.625, "step": 1859 }, { "dpo_loss": 0.1416015625, "epoch": 0.3, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 8.058975348385888e-07, "loss": 0.1583, "projector_lr": 2.4176926045157664e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.90625, "sft_loss": 0.7734375, "step": 1860 }, { "dpo_loss": 0.115234375, "epoch": 0.3, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 8.056966669806154e-07, "loss": 0.1227, "projector_lr": 2.4170900009418463e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.40625, "sft_loss": 0.76953125, "step": 1861 }, { "dpo_loss": 0.16796875, "epoch": 0.3, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 8.054957203037711e-07, "loss": 0.1048, "projector_lr": 2.4164871609113135e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.375, "sft_loss": 0.62109375, "step": 1862 }, { "dpo_loss": 0.10986328125, "epoch": 0.3, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 8.052946948598672e-07, "loss": 0.1932, "projector_lr": 2.4158840845796017e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.53125, "sft_loss": 0.63671875, "step": 1863 }, { "dpo_loss": 0.2138671875, "epoch": 0.3, "final_loss": 0.2138671875, "grad_norm": 0.0, "learning_rate": 8.050935907007345e-07, "loss": 0.192, "projector_lr": 2.415280772102204e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.46875, "sft_loss": 0.6171875, "step": 1864 }, { "dpo_loss": 0.021484375, "epoch": 0.3, "final_loss": 0.021484375, "grad_norm": 0.0, "learning_rate": 8.048924078782244e-07, "loss": 0.0588, "projector_lr": 2.4146772236346733e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.375, "sft_loss": 0.68359375, "step": 1865 }, { "dpo_loss": 0.039794921875, "epoch": 0.3, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 8.04691146444209e-07, "loss": 0.1917, "projector_lr": 2.414073439332627e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.5, "sft_loss": 0.431640625, "step": 1866 }, { "dpo_loss": 0.0179443359375, "epoch": 0.3, "final_loss": 0.0179443359375, "grad_norm": 0.0, "learning_rate": 8.044898064505798e-07, "loss": 0.1774, "projector_lr": 2.4134694193517393e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.8125, "sft_loss": 0.6640625, "step": 1867 }, { "dpo_loss": 0.2578125, "epoch": 0.3, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 8.042883879492491e-07, "loss": 0.2007, "projector_lr": 2.412865163847747e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.00115966796875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.34375, "sft_loss": 0.75, "step": 1868 }, { "dpo_loss": 0.06494140625, "epoch": 0.3, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 8.040868909921494e-07, "loss": 0.0682, "projector_lr": 2.4122606729764484e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.8125, "sft_loss": 0.7890625, "step": 1869 }, { "dpo_loss": 0.251953125, "epoch": 0.3, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 8.038853156312335e-07, "loss": 0.2752, "projector_lr": 2.411655946893701e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.09375, "sft_loss": 0.6328125, "step": 1870 }, { "dpo_loss": 0.5078125, "epoch": 0.3, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 8.036836619184743e-07, "loss": 0.3327, "projector_lr": 2.411050985755423e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 1.890625, "rewards_train/rejected": -3.265625, "sft_loss": 1.0546875, "step": 1871 }, { "dpo_loss": 0.142578125, "epoch": 0.3, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 8.034819299058646e-07, "loss": 0.1763, "projector_lr": 2.410445789717594e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.140625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.625, "sft_loss": 0.7578125, "step": 1872 }, { "dpo_loss": 0.234375, "epoch": 0.3, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 8.032801196454182e-07, "loss": 0.2378, "projector_lr": 2.4098403589362546e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.125, "sft_loss": 0.81640625, "step": 1873 }, { "dpo_loss": 0.486328125, "epoch": 0.3, "final_loss": 0.486328125, "grad_norm": 0.0, "learning_rate": 8.03078231189168e-07, "loss": 0.3397, "projector_lr": 2.409234693567504e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.765625, "rewards_train/margins": 2.859375, "rewards_train/rejected": -4.625, "sft_loss": 0.85546875, "step": 1874 }, { "dpo_loss": 0.09814453125, "epoch": 0.3, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 8.02876264589168e-07, "loss": 0.1033, "projector_lr": 2.408628793767504e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.78125, "sft_loss": 0.7578125, "step": 1875 }, { "dpo_loss": 0.13671875, "epoch": 0.3, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 8.026742198974919e-07, "loss": 0.1573, "projector_lr": 2.408022659692476e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.75, "sft_loss": 0.66015625, "step": 1876 }, { "dpo_loss": 0.1708984375, "epoch": 0.3, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 8.024720971662335e-07, "loss": 0.1258, "projector_lr": 2.4074162914987007e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.423828125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -3.90625, "sft_loss": 0.55078125, "step": 1877 }, { "dpo_loss": 0.11572265625, "epoch": 0.3, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 8.02269896447507e-07, "loss": 0.1314, "projector_lr": 2.406809689342521e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.25, "sft_loss": 0.89453125, "step": 1878 }, { "dpo_loss": 0.11865234375, "epoch": 0.3, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 8.020676177934464e-07, "loss": 0.2256, "projector_lr": 2.4062028533803394e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.34375, "sft_loss": 0.58203125, "step": 1879 }, { "dpo_loss": 0.126953125, "epoch": 0.3, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 8.018652612562061e-07, "loss": 0.1135, "projector_lr": 2.4055957837686184e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.248046875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.71875, "sft_loss": 0.49609375, "step": 1880 }, { "dpo_loss": 0.03759765625, "epoch": 0.3, "final_loss": 0.03759765625, "grad_norm": 0.0, "learning_rate": 8.016628268879599e-07, "loss": 0.0357, "projector_lr": 2.40498848066388e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.15625, "sft_loss": 0.59375, "step": 1881 }, { "dpo_loss": 0.0849609375, "epoch": 0.3, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 8.014603147409029e-07, "loss": 0.2582, "projector_lr": 2.404380944222709e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.1875, "sft_loss": 1.0234375, "step": 1882 }, { "dpo_loss": 0.314453125, "epoch": 0.3, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 8.012577248672492e-07, "loss": 0.1744, "projector_lr": 2.4037731746017475e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.53125, "sft_loss": 0.828125, "step": 1883 }, { "dpo_loss": 0.15234375, "epoch": 0.3, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 8.010550573192332e-07, "loss": 0.2139, "projector_lr": 2.4031651719577e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.90625, "sft_loss": 0.89453125, "step": 1884 }, { "dpo_loss": 0.1875, "epoch": 0.3, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 8.008523121491095e-07, "loss": 0.1913, "projector_lr": 2.402556936447329e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.9375, "sft_loss": 1.0078125, "step": 1885 }, { "dpo_loss": 0.396484375, "epoch": 0.3, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 8.006494894091527e-07, "loss": 0.248, "projector_lr": 2.4019484682274583e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.6875, "rewards_train/margins": 2.53125, "rewards_train/rejected": -5.21875, "sft_loss": 1.1328125, "step": 1886 }, { "dpo_loss": 0.01239013671875, "epoch": 0.3, "final_loss": 0.01239013671875, "grad_norm": 0.0, "learning_rate": 8.004465891516572e-07, "loss": 0.1516, "projector_lr": 2.4013397674549716e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 6.9375, "rewards_train/rejected": -8.25, "sft_loss": 0.62890625, "step": 1887 }, { "dpo_loss": 0.1416015625, "epoch": 0.3, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 8.002436114289375e-07, "loss": 0.1222, "projector_lr": 2.4007308342868125e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.125, "sft_loss": 0.703125, "step": 1888 }, { "dpo_loss": 0.4140625, "epoch": 0.3, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 8.000405562933281e-07, "loss": 0.4439, "projector_lr": 2.400121668879984e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.78125, "sft_loss": 0.859375, "step": 1889 }, { "dpo_loss": 0.65234375, "epoch": 0.3, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 7.998374237971832e-07, "loss": 0.3394, "projector_lr": 2.3995122713915497e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.28125, "sft_loss": 0.70703125, "step": 1890 }, { "dpo_loss": 0.1982421875, "epoch": 0.3, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 7.996342139928778e-07, "loss": 0.1592, "projector_lr": 2.3989026419786337e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.9375, "sft_loss": 0.82421875, "step": 1891 }, { "dpo_loss": 0.61328125, "epoch": 0.3, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 7.994309269328056e-07, "loss": 0.3502, "projector_lr": 2.3982927807984167e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.328125, "rewards_train/margins": 1.96875, "rewards_train/rejected": -4.3125, "sft_loss": 0.6875, "step": 1892 }, { "dpo_loss": 0.423828125, "epoch": 0.3, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 7.992275626693811e-07, "loss": 0.3801, "projector_lr": 2.397682688008143e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 1.6171875, "rewards_train/rejected": -2.921875, "sft_loss": 0.91796875, "step": 1893 }, { "dpo_loss": 0.052978515625, "epoch": 0.3, "final_loss": 0.052978515625, "grad_norm": 0.0, "learning_rate": 7.990241212550384e-07, "loss": 0.0494, "projector_lr": 2.3970723637651152e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.28125, "sft_loss": 0.8984375, "step": 1894 }, { "dpo_loss": 0.03857421875, "epoch": 0.3, "final_loss": 0.03857421875, "grad_norm": 0.0, "learning_rate": 7.988206027422315e-07, "loss": 0.3702, "projector_lr": 2.3964618082266946e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.28125, "sft_loss": 0.5859375, "step": 1895 }, { "dpo_loss": 0.1904296875, "epoch": 0.3, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 7.986170071834343e-07, "loss": 0.1931, "projector_lr": 2.3958510215503032e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.34375, "sft_loss": 0.96484375, "step": 1896 }, { "dpo_loss": 0.81640625, "epoch": 0.3, "final_loss": 0.81640625, "grad_norm": 0.0, "learning_rate": 7.984133346311409e-07, "loss": 0.8303, "projector_lr": 2.3952400038934227e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.34375, "rewards_train/margins": 2.53125, "rewards_train/rejected": -4.875, "sft_loss": 0.80859375, "step": 1897 }, { "dpo_loss": 0.07421875, "epoch": 0.3, "final_loss": 0.07421875, "grad_norm": 0.0, "learning_rate": 7.982095851378644e-07, "loss": 0.1864, "projector_lr": 2.394628755413593e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.1875, "sft_loss": 0.828125, "step": 1898 }, { "dpo_loss": 0.4296875, "epoch": 0.3, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 7.980057587561385e-07, "loss": 0.3477, "projector_lr": 2.3940172762684157e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.5625, "rewards_train/margins": 2.703125, "rewards_train/rejected": -5.25, "sft_loss": 0.96484375, "step": 1899 }, { "dpo_loss": 0.828125, "epoch": 0.3, "final_loss": 0.828125, "grad_norm": 0.0, "learning_rate": 7.978018555385166e-07, "loss": 0.4653, "projector_lr": 2.39340556661555e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.546875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.71875, "sft_loss": 0.859375, "step": 1900 }, { "dpo_loss": 0.1591796875, "epoch": 0.3, "final_loss": NaN, "grad_norm": 0.0, "learning_rate": 7.975978755375716e-07, "loss": 0.0811, "projector_lr": 2.392793626612715e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.21875, "sft_loss": NaN, "step": 1901 }, { "dpo_loss": 0.1298828125, "epoch": 0.3, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 7.973938188058966e-07, "loss": 0.3199, "projector_lr": 2.39218145641769e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.59375, "sft_loss": 0.62109375, "step": 1902 }, { "dpo_loss": 0.451171875, "epoch": 0.3, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 7.971896853961042e-07, "loss": 0.4454, "projector_lr": 2.391569056188313e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.296875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -6.40625, "sft_loss": 0.94140625, "step": 1903 }, { "dpo_loss": 0.025390625, "epoch": 0.3, "final_loss": 0.025390625, "grad_norm": 0.0, "learning_rate": 7.969854753608267e-07, "loss": 0.0812, "projector_lr": 2.3909564260824804e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.375, "sft_loss": 0.70703125, "step": 1904 }, { "dpo_loss": 0.1357421875, "epoch": 0.3, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 7.967811887527165e-07, "loss": 0.1732, "projector_lr": 2.3903435662581495e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.96875, "sft_loss": 0.75, "step": 1905 }, { "dpo_loss": 0.154296875, "epoch": 0.3, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 7.965768256244456e-07, "loss": 0.2388, "projector_lr": 2.389730476873337e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 2.40625, "rewards_train/rejected": -3.421875, "sft_loss": 0.921875, "step": 1906 }, { "dpo_loss": 0.1591796875, "epoch": 0.31, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 7.963723860287054e-07, "loss": 0.1664, "projector_lr": 2.3891171580861165e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.75, "sft_loss": 0.76171875, "step": 1907 }, { "dpo_loss": 0.0634765625, "epoch": 0.31, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 7.961678700182075e-07, "loss": 0.196, "projector_lr": 2.388503610054623e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.4375, "sft_loss": 0.62890625, "step": 1908 }, { "dpo_loss": 0.06689453125, "epoch": 0.31, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 7.959632776456829e-07, "loss": 0.0737, "projector_lr": 2.387889832937049e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.53125, "sft_loss": 0.6875, "step": 1909 }, { "dpo_loss": 0.0634765625, "epoch": 0.31, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 7.957586089638826e-07, "loss": 0.0612, "projector_lr": 2.387275826891648e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.90625, "sft_loss": 0.73828125, "step": 1910 }, { "dpo_loss": 0.294921875, "epoch": 0.31, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 7.955538640255768e-07, "loss": 0.2538, "projector_lr": 2.3866615920767306e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.875, "sft_loss": 0.83203125, "step": 1911 }, { "dpo_loss": 0.1689453125, "epoch": 0.31, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 7.953490428835557e-07, "loss": 0.1611, "projector_lr": 2.386047128650667e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.953125, "rewards_train/rejected": -5.40625, "sft_loss": 0.77734375, "step": 1912 }, { "dpo_loss": 0.1328125, "epoch": 0.31, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 7.951441455906292e-07, "loss": 0.1377, "projector_lr": 2.3854324367718876e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.71875, "sft_loss": 0.70703125, "step": 1913 }, { "dpo_loss": 0.53515625, "epoch": 0.31, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 7.949391721996265e-07, "loss": 0.3058, "projector_lr": 2.3848175165988794e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.90625, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -2.828125, "sft_loss": 0.8203125, "step": 1914 }, { "dpo_loss": 0.61328125, "epoch": 0.31, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 7.947341227633968e-07, "loss": 0.4065, "projector_lr": 2.3842023682901906e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.875, "sft_loss": 0.62109375, "step": 1915 }, { "dpo_loss": 0.2431640625, "epoch": 0.31, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 7.945289973348086e-07, "loss": 0.2258, "projector_lr": 2.383586992004426e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.65625, "sft_loss": 0.6875, "step": 1916 }, { "dpo_loss": 0.15234375, "epoch": 0.31, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 7.943237959667502e-07, "loss": 0.1339, "projector_lr": 2.382971387900251e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -6.03125, "sft_loss": 0.81640625, "step": 1917 }, { "dpo_loss": 0.60546875, "epoch": 0.31, "final_loss": 0.60546875, "grad_norm": 0.0, "learning_rate": 7.941185187121295e-07, "loss": 0.3496, "projector_lr": 2.382355556136389e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.03125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.3125, "sft_loss": 0.83984375, "step": 1918 }, { "dpo_loss": 0.357421875, "epoch": 0.31, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 7.939131656238738e-07, "loss": 0.2482, "projector_lr": 2.3817394968716216e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.75, "rewards_train/margins": 2.15625, "rewards_train/rejected": -3.90625, "sft_loss": 1.140625, "step": 1919 }, { "dpo_loss": 0.392578125, "epoch": 0.31, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 7.937077367549299e-07, "loss": 0.2318, "projector_lr": 2.3811232102647898e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.515625, "rewards_train/rejected": -3.953125, "sft_loss": 0.69921875, "step": 1920 }, { "dpo_loss": 0.462890625, "epoch": 0.31, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 7.935022321582642e-07, "loss": 0.5015, "projector_lr": 2.380506696474793e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.703125, "sft_loss": 0.84375, "step": 1921 }, { "dpo_loss": 0.462890625, "epoch": 0.31, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 7.932966518868632e-07, "loss": 0.2798, "projector_lr": 2.37988995566059e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.3125, "sft_loss": 0.6328125, "step": 1922 }, { "dpo_loss": 0.09130859375, "epoch": 0.31, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 7.930909959937319e-07, "loss": 0.1624, "projector_lr": 2.379272987981196e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.5, "sft_loss": 0.671875, "step": 1923 }, { "dpo_loss": 0.138671875, "epoch": 0.31, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 7.928852645318955e-07, "loss": 0.1603, "projector_lr": 2.3786557935956868e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.4375, "sft_loss": 0.8359375, "step": 1924 }, { "dpo_loss": 0.0732421875, "epoch": 0.31, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 7.926794575543983e-07, "loss": 0.2267, "projector_lr": 2.378038372663195e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71484375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.0625, "sft_loss": 0.734375, "step": 1925 }, { "dpo_loss": 0.2412109375, "epoch": 0.31, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 7.924735751143043e-07, "loss": 0.1863, "projector_lr": 2.3774207253429132e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.53125, "sft_loss": 0.73828125, "step": 1926 }, { "dpo_loss": 0.09619140625, "epoch": 0.31, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 7.922676172646973e-07, "loss": 0.191, "projector_lr": 2.3768028517940922e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.5, "sft_loss": 0.609375, "step": 1927 }, { "dpo_loss": 0.06103515625, "epoch": 0.31, "final_loss": 0.06103515625, "grad_norm": 0.0, "learning_rate": 7.920615840586798e-07, "loss": 0.1583, "projector_lr": 2.3761847521760392e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.53125, "sft_loss": 0.86328125, "step": 1928 }, { "dpo_loss": 0.0908203125, "epoch": 0.31, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 7.918554755493739e-07, "loss": 0.1444, "projector_lr": 2.375566426648122e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.125, "sft_loss": 0.81640625, "step": 1929 }, { "dpo_loss": 0.244140625, "epoch": 0.31, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 7.916492917899216e-07, "loss": 0.1723, "projector_lr": 2.374947875369765e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -5.40625, "sft_loss": 0.6796875, "step": 1930 }, { "dpo_loss": 0.032958984375, "epoch": 0.31, "final_loss": 0.032958984375, "grad_norm": 0.0, "learning_rate": 7.91443032833484e-07, "loss": 0.071, "projector_lr": 2.3743290985004522e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.625, "sft_loss": 0.87109375, "step": 1931 }, { "dpo_loss": 0.31640625, "epoch": 0.31, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 7.912366987332415e-07, "loss": 0.2006, "projector_lr": 2.373710096199725e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.921875, "rewards_train/margins": 2.40625, "rewards_train/rejected": -5.34375, "sft_loss": 0.51171875, "step": 1932 }, { "dpo_loss": 0.1044921875, "epoch": 0.31, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 7.910302895423941e-07, "loss": 0.1474, "projector_lr": 2.3730908686271823e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.53125, "sft_loss": 0.90625, "step": 1933 }, { "dpo_loss": 0.259765625, "epoch": 0.31, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 7.908238053141607e-07, "loss": 0.1452, "projector_lr": 2.3724714159424825e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 1.90625, "rewards_train/rejected": -3.59375, "sft_loss": 0.85546875, "step": 1934 }, { "dpo_loss": 0.0849609375, "epoch": 0.31, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 7.9061724610178e-07, "loss": 0.0772, "projector_lr": 2.3718517383053403e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.71875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.9375, "sft_loss": 0.76171875, "step": 1935 }, { "dpo_loss": 0.375, "epoch": 0.31, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 7.904106119585103e-07, "loss": 0.3013, "projector_lr": 2.3712318358755308e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -5.21875, "sft_loss": 0.5234375, "step": 1936 }, { "dpo_loss": 0.078125, "epoch": 0.31, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 7.902039029376281e-07, "loss": 0.2183, "projector_lr": 2.3706117088128847e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -5.375, "sft_loss": 0.68359375, "step": 1937 }, { "dpo_loss": 0.1689453125, "epoch": 0.31, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 7.899971190924308e-07, "loss": 0.2425, "projector_lr": 2.3699913572772924e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.375, "sft_loss": 0.6875, "step": 1938 }, { "dpo_loss": 0.1943359375, "epoch": 0.31, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 7.897902604762334e-07, "loss": 0.2249, "projector_lr": 2.3693707814287007e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.373046875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.59375, "sft_loss": 0.75390625, "step": 1939 }, { "dpo_loss": 0.451171875, "epoch": 0.31, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 7.895833271423715e-07, "loss": 0.4071, "projector_lr": 2.368749981427115e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 1.7890625, "rewards_train/rejected": -3.4375, "sft_loss": 0.84375, "step": 1940 }, { "dpo_loss": 0.04150390625, "epoch": 0.31, "final_loss": 0.04150390625, "grad_norm": 0.0, "learning_rate": 7.893763191441993e-07, "loss": 0.0935, "projector_lr": 2.368128957432598e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.71875, "sft_loss": 0.7578125, "step": 1941 }, { "dpo_loss": 0.2431640625, "epoch": 0.31, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 7.891692365350904e-07, "loss": 0.2368, "projector_lr": 2.3675077096052712e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.796875, "rewards_train/rejected": -5.1875, "sft_loss": 0.87890625, "step": 1942 }, { "dpo_loss": 0.126953125, "epoch": 0.31, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 7.889620793684377e-07, "loss": 0.1093, "projector_lr": 2.3668862381053133e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -7.0625, "sft_loss": 0.73046875, "step": 1943 }, { "dpo_loss": 0.310546875, "epoch": 0.31, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 7.887548476976533e-07, "loss": 0.1796, "projector_lr": 2.3662645430929597e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.859375, "rewards_train/margins": 4.125, "rewards_train/rejected": -6.0, "sft_loss": 0.671875, "step": 1944 }, { "dpo_loss": 0.201171875, "epoch": 0.31, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 7.885475415761685e-07, "loss": 0.2911, "projector_lr": 2.3656426247285054e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -5.1875, "sft_loss": 0.95703125, "step": 1945 }, { "dpo_loss": 0.1728515625, "epoch": 0.31, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 7.883401610574336e-07, "loss": 0.1713, "projector_lr": 2.365020483172301e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 2.53125, "rewards_train/rejected": -4.34375, "sft_loss": 1.1015625, "step": 1946 }, { "dpo_loss": 0.328125, "epoch": 0.31, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 7.881327061949185e-07, "loss": 0.2326, "projector_lr": 2.3643981185847555e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.015625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.84375, "sft_loss": 0.81640625, "step": 1947 }, { "dpo_loss": 0.38671875, "epoch": 0.31, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 7.879251770421118e-07, "loss": 0.4268, "projector_lr": 2.363775531126336e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.0625, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.96875, "sft_loss": 0.89453125, "step": 1948 }, { "dpo_loss": 0.04296875, "epoch": 0.31, "final_loss": 0.04296875, "grad_norm": 0.0, "learning_rate": 7.877175736525217e-07, "loss": 0.2645, "projector_lr": 2.3631527209575653e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.4375, "sft_loss": 0.65625, "step": 1949 }, { "dpo_loss": 0.140625, "epoch": 0.31, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 7.875098960796753e-07, "loss": 0.1446, "projector_lr": 2.3625296882390263e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.9375, "sft_loss": 0.83984375, "step": 1950 }, { "dpo_loss": 0.1982421875, "epoch": 0.31, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 7.873021443771189e-07, "loss": 0.2239, "projector_lr": 2.3619064331313567e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.625, "sft_loss": 0.7265625, "step": 1951 }, { "dpo_loss": 0.130859375, "epoch": 0.31, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 7.870943185984174e-07, "loss": 0.1637, "projector_lr": 2.3612829557952524e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9375, "rewards_train/margins": 3.40625, "rewards_train/rejected": -5.34375, "sft_loss": 0.8359375, "step": 1952 }, { "dpo_loss": 0.2412109375, "epoch": 0.31, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 7.86886418797156e-07, "loss": 0.321, "projector_lr": 2.3606592563914683e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.15625, "sft_loss": 0.75, "step": 1953 }, { "dpo_loss": 0.609375, "epoch": 0.31, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 7.866784450269377e-07, "loss": 0.487, "projector_lr": 2.360035335080813e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.453125, "rewards_train/margins": 1.6796875, "rewards_train/rejected": -4.125, "sft_loss": 1.140625, "step": 1954 }, { "dpo_loss": 0.7109375, "epoch": 0.31, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 7.864703973413854e-07, "loss": 0.3995, "projector_lr": 2.3594111920241563e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.984375, "rewards_train/margins": 2.5625, "rewards_train/rejected": -5.53125, "sft_loss": 1.0078125, "step": 1955 }, { "dpo_loss": 0.73828125, "epoch": 0.31, "final_loss": 0.73828125, "grad_norm": 0.0, "learning_rate": 7.862622757941409e-07, "loss": 0.4939, "projector_lr": 2.3587868273824226e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8125, "rewards_train/margins": 2.390625, "rewards_train/rejected": -4.1875, "sft_loss": 0.875, "step": 1956 }, { "dpo_loss": 0.34765625, "epoch": 0.31, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 7.860540804388643e-07, "loss": 0.2763, "projector_lr": 2.3581622413165932e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.15625, "sft_loss": 0.82421875, "step": 1957 }, { "dpo_loss": 0.03662109375, "epoch": 0.31, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 7.858458113292361e-07, "loss": 0.1375, "projector_lr": 2.3575374339877086e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.28125, "sft_loss": 0.90625, "step": 1958 }, { "dpo_loss": 0.279296875, "epoch": 0.31, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 7.856374685189547e-07, "loss": 0.1958, "projector_lr": 2.356912405556864e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.1875, "sft_loss": 1.140625, "step": 1959 }, { "dpo_loss": 0.15234375, "epoch": 0.31, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 7.85429052061738e-07, "loss": 0.0937, "projector_lr": 2.356287156185214e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.78125, "sft_loss": 0.51953125, "step": 1960 }, { "dpo_loss": 0.25390625, "epoch": 0.31, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 7.852205620113224e-07, "loss": 0.1908, "projector_lr": 2.355661686033967e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.6875, "sft_loss": 0.828125, "step": 1961 }, { "dpo_loss": 0.01434326171875, "epoch": 0.31, "final_loss": 0.01434326171875, "grad_norm": 0.0, "learning_rate": 7.850119984214641e-07, "loss": 0.1987, "projector_lr": 2.3550359952643926e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.53125, "rewards_train/margins": 7.5, "rewards_train/rejected": -9.0, "sft_loss": 0.6328125, "step": 1962 }, { "dpo_loss": 0.34765625, "epoch": 0.31, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 7.848033613459377e-07, "loss": 0.2477, "projector_lr": 2.3544100840378133e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.53125, "sft_loss": 1.1484375, "step": 1963 }, { "dpo_loss": 0.2734375, "epoch": 0.31, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 7.845946508385368e-07, "loss": 0.2176, "projector_lr": 2.3537839525156106e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.84375, "sft_loss": 0.66015625, "step": 1964 }, { "dpo_loss": 0.0400390625, "epoch": 0.31, "final_loss": 0.0400390625, "grad_norm": 0.0, "learning_rate": 7.84385866953074e-07, "loss": 0.0367, "projector_lr": 2.353157600859222e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.859375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.4375, "sft_loss": 0.90625, "step": 1965 }, { "dpo_loss": 0.291015625, "epoch": 0.31, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 7.841770097433807e-07, "loss": 0.3594, "projector_lr": 2.352531029230142e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.1875, "sft_loss": 0.59375, "step": 1966 }, { "dpo_loss": 0.1240234375, "epoch": 0.31, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 7.839680792633073e-07, "loss": 0.2523, "projector_lr": 2.3519042377899216e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.796875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.625, "sft_loss": 1.453125, "step": 1967 }, { "dpo_loss": 0.27734375, "epoch": 0.31, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 7.837590755667232e-07, "loss": 0.1764, "projector_lr": 2.3512772267001697e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 8.6875, "rewards_train/rejected": -9.75, "sft_loss": 0.703125, "step": 1968 }, { "dpo_loss": 0.1630859375, "epoch": 0.32, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 7.835499987075164e-07, "loss": 0.2731, "projector_lr": 2.3506499961225497e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.9375, "sft_loss": 0.51171875, "step": 1969 }, { "dpo_loss": 0.1044921875, "epoch": 0.32, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 7.833408487395943e-07, "loss": 0.1308, "projector_lr": 2.350022546218783e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.3125, "sft_loss": 0.46484375, "step": 1970 }, { "dpo_loss": 0.25, "epoch": 0.32, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 7.831316257168825e-07, "loss": 0.2277, "projector_lr": 2.3493948771506476e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.640625, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.375, "sft_loss": 0.72265625, "step": 1971 }, { "dpo_loss": 0.064453125, "epoch": 0.32, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 7.829223296933259e-07, "loss": 0.1152, "projector_lr": 2.3487669890799776e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.1875, "sft_loss": 0.58984375, "step": 1972 }, { "dpo_loss": 0.1044921875, "epoch": 0.32, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 7.827129607228877e-07, "loss": 0.1608, "projector_lr": 2.3481388821686636e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 2.984375, "rewards_train/rejected": -4.09375, "sft_loss": 0.875, "step": 1973 }, { "dpo_loss": 0.040771484375, "epoch": 0.32, "final_loss": 0.040771484375, "grad_norm": 0.0, "learning_rate": 7.825035188595508e-07, "loss": 0.0727, "projector_lr": 2.3475105565786524e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.125, "sft_loss": 0.76953125, "step": 1974 }, { "dpo_loss": 0.2275390625, "epoch": 0.32, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 7.822940041573158e-07, "loss": 0.1957, "projector_lr": 2.3468820124719476e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 3.859375, "rewards_train/rejected": -5.5625, "sft_loss": 0.9453125, "step": 1975 }, { "dpo_loss": 0.4375, "epoch": 0.32, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 7.82084416670203e-07, "loss": 0.3091, "projector_lr": 2.346253250010609e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 2.53125, "rewards_train/rejected": -4.3125, "sft_loss": 0.765625, "step": 1976 }, { "dpo_loss": 0.146484375, "epoch": 0.32, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 7.818747564522509e-07, "loss": 0.174, "projector_lr": 2.345624269356753e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.90625, "sft_loss": 0.7734375, "step": 1977 }, { "dpo_loss": 0.016845703125, "epoch": 0.32, "final_loss": 0.016845703125, "grad_norm": 0.0, "learning_rate": 7.81665023557517e-07, "loss": 0.028, "projector_lr": 2.3449950706725515e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.3125, "sft_loss": 0.6640625, "step": 1978 }, { "dpo_loss": 0.06591796875, "epoch": 0.32, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 7.814552180400775e-07, "loss": 0.1704, "projector_lr": 2.3443656541202326e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -6.1875, "sft_loss": 0.80078125, "step": 1979 }, { "dpo_loss": 0.29296875, "epoch": 0.32, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 7.812453399540273e-07, "loss": 0.1814, "projector_lr": 2.343736019862082e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.53125, "sft_loss": 1.0625, "step": 1980 }, { "dpo_loss": 0.036376953125, "epoch": 0.32, "final_loss": 0.036376953125, "grad_norm": 0.0, "learning_rate": 7.810353893534802e-07, "loss": 0.0273, "projector_lr": 2.3431061680604405e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.71875, "sft_loss": 0.5703125, "step": 1981 }, { "dpo_loss": 0.6171875, "epoch": 0.32, "final_loss": 0.6171875, "grad_norm": 0.0, "learning_rate": 7.808253662925682e-07, "loss": 0.3126, "projector_lr": 2.3424760988777044e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.9375, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.6875, "sft_loss": 0.734375, "step": 1982 }, { "dpo_loss": 0.26171875, "epoch": 0.32, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 7.806152708254423e-07, "loss": 0.2136, "projector_lr": 2.341845812476327e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.796875, "sft_loss": 0.8515625, "step": 1983 }, { "dpo_loss": 0.30859375, "epoch": 0.32, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 7.804051030062726e-07, "loss": 0.2121, "projector_lr": 2.3412153090188178e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.1875, "rewards_train/margins": 3.546875, "rewards_train/rejected": -5.71875, "sft_loss": 0.73828125, "step": 1984 }, { "dpo_loss": 0.00994873046875, "epoch": 0.32, "final_loss": 0.00994873046875, "grad_norm": 0.0, "learning_rate": 7.80194862889247e-07, "loss": 0.07, "projector_lr": 2.340584588667741e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.046875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -7.34375, "sft_loss": 0.98828125, "step": 1985 }, { "dpo_loss": 0.416015625, "epoch": 0.32, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 7.799845505285726e-07, "loss": 0.2621, "projector_lr": 2.339953651585718e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.53125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -5.3125, "sft_loss": 0.6875, "step": 1986 }, { "dpo_loss": 0.037109375, "epoch": 0.32, "final_loss": 0.037109375, "grad_norm": 0.0, "learning_rate": 7.79774165978475e-07, "loss": 0.1171, "projector_lr": 2.339322497935425e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.484375, "rewards_train/margins": 4.875, "rewards_train/rejected": -7.375, "sft_loss": 0.890625, "step": 1987 }, { "dpo_loss": 0.1494140625, "epoch": 0.32, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 7.795637092931985e-07, "loss": 0.2102, "projector_lr": 2.3386911278795957e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.5, "sft_loss": 1.109375, "step": 1988 }, { "dpo_loss": 0.244140625, "epoch": 0.32, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 7.793531805270057e-07, "loss": 0.1582, "projector_lr": 2.3380595415810174e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 4.25, "rewards_train/rejected": -6.15625, "sft_loss": 0.88671875, "step": 1989 }, { "dpo_loss": 0.25390625, "epoch": 0.32, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 7.791425797341782e-07, "loss": 0.181, "projector_lr": 2.3374277392025347e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.765625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.84375, "sft_loss": 1.0546875, "step": 1990 }, { "dpo_loss": 0.267578125, "epoch": 0.32, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 7.789319069690156e-07, "loss": 0.363, "projector_lr": 2.3367957209070473e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.21875, "rewards_train/rejected": -4.625, "sft_loss": 0.83984375, "step": 1991 }, { "dpo_loss": 0.01068115234375, "epoch": 0.32, "final_loss": 0.01068115234375, "grad_norm": 0.0, "learning_rate": 7.787211622858369e-07, "loss": 0.2464, "projector_lr": 2.336163486857511e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 6.75, "rewards_train/rejected": -8.25, "sft_loss": 0.859375, "step": 1992 }, { "dpo_loss": 0.1220703125, "epoch": 0.32, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 7.785103457389791e-07, "loss": 0.0656, "projector_lr": 2.335531037216937e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -5.5625, "sft_loss": 1.0, "step": 1993 }, { "dpo_loss": 0.1298828125, "epoch": 0.32, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 7.782994573827974e-07, "loss": 0.1571, "projector_lr": 2.3348983721483924e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.78125, "sft_loss": 0.82421875, "step": 1994 }, { "dpo_loss": 0.18359375, "epoch": 0.32, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 7.780884972716661e-07, "loss": 0.2971, "projector_lr": 2.3342654918149987e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.09375, "sft_loss": 0.5703125, "step": 1995 }, { "dpo_loss": 0.1484375, "epoch": 0.32, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 7.778774654599781e-07, "loss": 0.182, "projector_lr": 2.3336323963799343e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.71875, "sft_loss": 1.109375, "step": 1996 }, { "dpo_loss": 0.1357421875, "epoch": 0.32, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 7.776663620021438e-07, "loss": 0.2051, "projector_lr": 2.3329990860064313e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.5, "sft_loss": 0.7265625, "step": 1997 }, { "dpo_loss": 0.2734375, "epoch": 0.32, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 7.774551869525934e-07, "loss": 0.3642, "projector_lr": 2.3323655608577804e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.40625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -6.375, "sft_loss": 0.98046875, "step": 1998 }, { "dpo_loss": 0.126953125, "epoch": 0.32, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 7.772439403657747e-07, "loss": 0.1934, "projector_lr": 2.3317318210973243e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.90625, "sft_loss": 0.6484375, "step": 1999 }, { "dpo_loss": 0.054443359375, "epoch": 0.32, "final_loss": 0.054443359375, "grad_norm": 0.0, "learning_rate": 7.770326222961541e-07, "loss": 0.1207, "projector_lr": 2.3310978668884623e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.375, "sft_loss": 1.0, "step": 2000 }, { "dpo_loss": 0.0206298828125, "epoch": 0.32, "final_loss": 0.0206298828125, "grad_norm": 0.0, "learning_rate": 7.768212327982167e-07, "loss": 0.0865, "projector_lr": 2.3304636983946502e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 6.90625, "rewards_train/rejected": -7.6875, "sft_loss": 0.69140625, "step": 2001 }, { "dpo_loss": 0.3125, "epoch": 0.32, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 7.766097719264656e-07, "loss": 0.254, "projector_lr": 2.3298293157793968e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.3125, "sft_loss": 0.76171875, "step": 2002 }, { "dpo_loss": 0.12109375, "epoch": 0.32, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 7.763982397354227e-07, "loss": 0.064, "projector_lr": 2.329194719206268e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.0625, "sft_loss": 0.83203125, "step": 2003 }, { "dpo_loss": 0.06591796875, "epoch": 0.32, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 7.761866362796279e-07, "loss": 0.1223, "projector_lr": 2.328559908838884e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.890625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -7.1875, "sft_loss": 0.66015625, "step": 2004 }, { "dpo_loss": 0.1328125, "epoch": 0.32, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 7.759749616136398e-07, "loss": 0.1217, "projector_lr": 2.3279248848409193e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9453125, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.84375, "sft_loss": 0.55078125, "step": 2005 }, { "dpo_loss": 0.21875, "epoch": 0.32, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 7.757632157920352e-07, "loss": 0.1503, "projector_lr": 2.327289647376106e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.53125, "rewards_train/margins": 3.5, "rewards_train/rejected": -5.03125, "sft_loss": 0.8984375, "step": 2006 }, { "dpo_loss": 0.71875, "epoch": 0.32, "final_loss": 0.71875, "grad_norm": 0.0, "learning_rate": 7.755513988694094e-07, "loss": 0.6004, "projector_lr": 2.326654196608228e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -3.8125, "sft_loss": 0.58203125, "step": 2007 }, { "dpo_loss": 0.333984375, "epoch": 0.32, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 7.753395109003757e-07, "loss": 0.2385, "projector_lr": 2.326018532701127e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.84375, "sft_loss": 0.859375, "step": 2008 }, { "dpo_loss": 0.12451171875, "epoch": 0.32, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 7.751275519395661e-07, "loss": 0.1493, "projector_lr": 2.3253826558186985e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.109375, "rewards_train/margins": 3.5, "rewards_train/rejected": -5.625, "sft_loss": 0.71484375, "step": 2009 }, { "dpo_loss": 0.5703125, "epoch": 0.32, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 7.749155220416308e-07, "loss": 0.3506, "projector_lr": 2.3247465661248926e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.921875, "rewards_train/margins": 1.6328125, "rewards_train/rejected": -3.5625, "sft_loss": 0.8515625, "step": 2010 }, { "dpo_loss": 0.34765625, "epoch": 0.32, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 7.74703421261238e-07, "loss": 0.2044, "projector_lr": 2.3241102637837142e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.5, "sft_loss": 0.80859375, "step": 2011 }, { "dpo_loss": 0.31640625, "epoch": 0.32, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 7.744912496530747e-07, "loss": 0.1716, "projector_lr": 2.323473748959224e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 3.328125, "rewards_train/rejected": -5.0, "sft_loss": 0.87109375, "step": 2012 }, { "dpo_loss": 0.06640625, "epoch": 0.32, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 7.742790072718457e-07, "loss": 0.0428, "projector_lr": 2.3228370218155373e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.5625, "sft_loss": 1.0078125, "step": 2013 }, { "dpo_loss": 0.291015625, "epoch": 0.32, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 7.740666941722742e-07, "loss": 0.2183, "projector_lr": 2.3222000825168227e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 3.234375, "rewards_train/rejected": -5.0, "sft_loss": 1.0, "step": 2014 }, { "dpo_loss": 0.115234375, "epoch": 0.32, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 7.738543104091015e-07, "loss": 0.171, "projector_lr": 2.321562931227305e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.53125, "sft_loss": 0.76171875, "step": 2015 }, { "dpo_loss": 0.15234375, "epoch": 0.32, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 7.736418560370876e-07, "loss": 0.143, "projector_lr": 2.320925568111263e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.4375, "sft_loss": 0.85546875, "step": 2016 }, { "dpo_loss": 0.185546875, "epoch": 0.32, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 7.734293311110103e-07, "loss": 0.1235, "projector_lr": 2.320287993333031e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.0625, "sft_loss": 0.71875, "step": 2017 }, { "dpo_loss": 0.12158203125, "epoch": 0.32, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 7.732167356856654e-07, "loss": 0.2307, "projector_lr": 2.3196502070569967e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.15625, "sft_loss": 0.63671875, "step": 2018 }, { "dpo_loss": 0.484375, "epoch": 0.32, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 7.730040698158676e-07, "loss": 0.2908, "projector_lr": 2.319012209447603e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.5625, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.5625, "sft_loss": 0.95703125, "step": 2019 }, { "dpo_loss": 0.142578125, "epoch": 0.32, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 7.727913335564489e-07, "loss": 0.232, "projector_lr": 2.318374000669347e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.375, "sft_loss": 0.84375, "step": 2020 }, { "dpo_loss": 0.419921875, "epoch": 0.32, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 7.7257852696226e-07, "loss": 0.2644, "projector_lr": 2.31773558088678e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.875, "sft_loss": 0.72265625, "step": 2021 }, { "dpo_loss": 0.1884765625, "epoch": 0.32, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 7.723656500881695e-07, "loss": 0.2097, "projector_lr": 2.3170969502645084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.703125, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.375, "sft_loss": 0.7890625, "step": 2022 }, { "dpo_loss": 0.119140625, "epoch": 0.32, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 7.721527029890644e-07, "loss": 0.2922, "projector_lr": 2.3164581089671933e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.78125, "sft_loss": 0.890625, "step": 2023 }, { "dpo_loss": 0.3984375, "epoch": 0.32, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 7.719396857198495e-07, "loss": 0.2786, "projector_lr": 2.3158190571595487e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.859375, "sft_loss": 0.734375, "step": 2024 }, { "dpo_loss": 0.11279296875, "epoch": 0.32, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 7.717265983354477e-07, "loss": 0.2506, "projector_lr": 2.3151797950063434e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.875, "sft_loss": 0.77734375, "step": 2025 }, { "dpo_loss": 0.376953125, "epoch": 0.32, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 7.715134408908004e-07, "loss": 0.2981, "projector_lr": 2.3145403226724014e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.90625, "sft_loss": 0.625, "step": 2026 }, { "dpo_loss": 0.345703125, "epoch": 0.32, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 7.713002134408664e-07, "loss": 0.5502, "projector_lr": 2.3139006403225995e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0625, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.375, "sft_loss": 0.8125, "step": 2027 }, { "dpo_loss": 0.06640625, "epoch": 0.32, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 7.71086916040623e-07, "loss": 0.0754, "projector_lr": 2.3132607481218693e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0001220703125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.0, "sft_loss": 0.5859375, "step": 2028 }, { "dpo_loss": 0.3046875, "epoch": 0.32, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 7.708735487450656e-07, "loss": 0.3495, "projector_lr": 2.312620646235197e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 2.4375, "rewards_train/rejected": -3.703125, "sft_loss": 0.765625, "step": 2029 }, { "dpo_loss": 0.7890625, "epoch": 0.32, "final_loss": 0.7890625, "grad_norm": 0.0, "learning_rate": 7.706601116092073e-07, "loss": 0.466, "projector_lr": 2.311980334827622e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.09375, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.671875, "sft_loss": 0.84765625, "step": 2030 }, { "dpo_loss": 0.029296875, "epoch": 0.32, "final_loss": 0.029296875, "grad_norm": 0.0, "learning_rate": 7.704466046880794e-07, "loss": 0.2885, "projector_lr": 2.3113398140642386e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.78125, "rewards_train/rejected": -7.25, "sft_loss": 1.046875, "step": 2031 }, { "dpo_loss": 0.412109375, "epoch": 0.33, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 7.702330280367313e-07, "loss": 0.4227, "projector_lr": 2.310699084110194e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1875, "rewards_train/margins": 2.0, "rewards_train/rejected": -3.1875, "sft_loss": 1.0234375, "step": 2032 }, { "dpo_loss": 0.2060546875, "epoch": 0.33, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 7.7001938171023e-07, "loss": 0.3523, "projector_lr": 2.3100581451306902e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.0, "sft_loss": 0.6875, "step": 2033 }, { "dpo_loss": 0.04345703125, "epoch": 0.33, "final_loss": 0.04345703125, "grad_norm": 0.0, "learning_rate": 7.698056657636607e-07, "loss": 0.1102, "projector_lr": 2.309416997290982e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.78125, "sft_loss": 0.84375, "step": 2034 }, { "dpo_loss": 0.2177734375, "epoch": 0.33, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 7.695918802521269e-07, "loss": 0.2588, "projector_lr": 2.308775640756381e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.171875, "rewards_train/margins": 6.625, "rewards_train/rejected": -7.8125, "sft_loss": 0.7421875, "step": 2035 }, { "dpo_loss": 0.125, "epoch": 0.33, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 7.693780252307491e-07, "loss": 0.1591, "projector_lr": 2.3081340756922474e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.65625, "sft_loss": 0.8984375, "step": 2036 }, { "dpo_loss": 0.1640625, "epoch": 0.33, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 7.691641007546668e-07, "loss": 0.2911, "projector_lr": 2.307492302264001e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.5, "sft_loss": 0.7578125, "step": 2037 }, { "dpo_loss": 0.30859375, "epoch": 0.33, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 7.68950106879037e-07, "loss": 0.3445, "projector_lr": 2.306850320637111e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.828125, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.21875, "sft_loss": 0.8828125, "step": 2038 }, { "dpo_loss": 0.384765625, "epoch": 0.33, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 7.68736043659034e-07, "loss": 0.293, "projector_lr": 2.306208130977102e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.375, "sft_loss": 0.63671875, "step": 2039 }, { "dpo_loss": 0.2109375, "epoch": 0.33, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 7.685219111498507e-07, "loss": 0.2012, "projector_lr": 2.3055657334495523e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.21875, "rewards_train/rejected": -4.71875, "sft_loss": 0.7421875, "step": 2040 }, { "dpo_loss": 0.484375, "epoch": 0.33, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 7.68307709406698e-07, "loss": 0.391, "projector_lr": 2.304923128220094e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.53125, "sft_loss": 0.86328125, "step": 2041 }, { "dpo_loss": 0.3203125, "epoch": 0.33, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 7.680934384848038e-07, "loss": 0.3287, "projector_lr": 2.3042803154544115e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.734375, "sft_loss": 0.58203125, "step": 2042 }, { "dpo_loss": 0.349609375, "epoch": 0.33, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 7.678790984394145e-07, "loss": 0.2277, "projector_lr": 2.303637295318244e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.53125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.96875, "sft_loss": 0.98046875, "step": 2043 }, { "dpo_loss": 0.1484375, "epoch": 0.33, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 7.676646893257945e-07, "loss": 0.2026, "projector_lr": 2.3029940679773837e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0264892578125, "rewards_train/margins": 4.375, "rewards_train/rejected": -4.40625, "sft_loss": 0.72265625, "step": 2044 }, { "dpo_loss": 0.03369140625, "epoch": 0.33, "final_loss": 0.03369140625, "grad_norm": 0.0, "learning_rate": 7.674502111992254e-07, "loss": 0.1778, "projector_lr": 2.3023506335976763e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.46875, "sft_loss": 0.7265625, "step": 2045 }, { "dpo_loss": 0.21484375, "epoch": 0.33, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 7.672356641150068e-07, "loss": 0.1391, "projector_lr": 2.3017069923450205e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.296875, "sft_loss": 0.67578125, "step": 2046 }, { "dpo_loss": 0.28125, "epoch": 0.33, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 7.670210481284561e-07, "loss": 0.2852, "projector_lr": 2.3010631443853682e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.15625, "sft_loss": 0.7421875, "step": 2047 }, { "dpo_loss": 0.2333984375, "epoch": 0.33, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 7.668063632949088e-07, "loss": 0.1731, "projector_lr": 2.3004190898847265e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.59375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.4375, "sft_loss": 0.69921875, "step": 2048 }, { "dpo_loss": 0.23046875, "epoch": 0.33, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 7.665916096697176e-07, "loss": 0.1654, "projector_lr": 2.299774829009153e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.453125, "sft_loss": 0.78515625, "step": 2049 }, { "dpo_loss": 0.07568359375, "epoch": 0.33, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 7.663767873082532e-07, "loss": 0.077, "projector_lr": 2.29913036192476e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.84375, "sft_loss": 0.765625, "step": 2050 }, { "dpo_loss": 0.2294921875, "epoch": 0.33, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 7.661618962659042e-07, "loss": 0.677, "projector_lr": 2.2984856887977126e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.765625, "sft_loss": 0.8671875, "step": 2051 }, { "dpo_loss": 0.2431640625, "epoch": 0.33, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 7.659469365980767e-07, "loss": 0.2342, "projector_lr": 2.29784080979423e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.875, "sft_loss": 0.5703125, "step": 2052 }, { "dpo_loss": 0.26953125, "epoch": 0.33, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 7.657319083601943e-07, "loss": 0.2012, "projector_lr": 2.297195725080583e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.625, "sft_loss": 0.69921875, "step": 2053 }, { "dpo_loss": 0.65625, "epoch": 0.33, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 7.655168116076988e-07, "loss": 0.4179, "projector_lr": 2.296550434823097e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.96875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -7.1875, "sft_loss": 0.76171875, "step": 2054 }, { "dpo_loss": 0.177734375, "epoch": 0.33, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 7.653016463960494e-07, "loss": 0.2106, "projector_lr": 2.295904939188148e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.34375, "sft_loss": 0.6484375, "step": 2055 }, { "dpo_loss": 0.2177734375, "epoch": 0.33, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 7.650864127807224e-07, "loss": 0.1747, "projector_lr": 2.2952592383421674e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.40625, "sft_loss": 0.84765625, "step": 2056 }, { "dpo_loss": 0.306640625, "epoch": 0.33, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 7.648711108172129e-07, "loss": 0.2689, "projector_lr": 2.294613332451639e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.46875, "sft_loss": 1.03125, "step": 2057 }, { "dpo_loss": 0.3125, "epoch": 0.33, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 7.646557405610325e-07, "loss": 0.3814, "projector_lr": 2.2939672216830977e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.15625, "sft_loss": 0.6953125, "step": 2058 }, { "dpo_loss": 0.06787109375, "epoch": 0.33, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 7.644403020677113e-07, "loss": 0.1907, "projector_lr": 2.293320906203134e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.251953125, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.125, "sft_loss": 0.484375, "step": 2059 }, { "dpo_loss": 0.10546875, "epoch": 0.33, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 7.64224795392796e-07, "loss": 0.1132, "projector_lr": 2.2926743861783882e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.953125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.96875, "sft_loss": 1.0625, "step": 2060 }, { "dpo_loss": 0.14453125, "epoch": 0.33, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 7.640092205918521e-07, "loss": 0.3647, "projector_lr": 2.2920276617755566e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.28125, "sft_loss": 0.73828125, "step": 2061 }, { "dpo_loss": 0.11474609375, "epoch": 0.33, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 7.637935777204619e-07, "loss": 0.1458, "projector_lr": 2.2913807331613857e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.90625, "sft_loss": 0.85546875, "step": 2062 }, { "dpo_loss": 0.11669921875, "epoch": 0.33, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 7.635778668342249e-07, "loss": 0.0696, "projector_lr": 2.290733600502675e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.71875, "sft_loss": 0.78515625, "step": 2063 }, { "dpo_loss": 0.1728515625, "epoch": 0.33, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 7.63362087988759e-07, "loss": 0.163, "projector_lr": 2.290086263966277e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 3.15625, "rewards_train/rejected": -3.578125, "sft_loss": 0.6328125, "step": 2064 }, { "dpo_loss": 0.01708984375, "epoch": 0.33, "final_loss": 0.01708984375, "grad_norm": 0.0, "learning_rate": 7.631462412396993e-07, "loss": 0.0258, "projector_lr": 2.289438723719098e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17578125, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.3125, "sft_loss": 0.73828125, "step": 2065 }, { "dpo_loss": 0.26953125, "epoch": 0.33, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 7.629303266426981e-07, "loss": 0.4246, "projector_lr": 2.288790979928095e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.15625, "sft_loss": 1.03125, "step": 2066 }, { "dpo_loss": 0.016357421875, "epoch": 0.33, "final_loss": 0.016357421875, "grad_norm": 0.0, "learning_rate": 7.627143442534257e-07, "loss": 0.3623, "projector_lr": 2.288143032760277e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.6875, "sft_loss": 0.6875, "step": 2067 }, { "dpo_loss": 0.1669921875, "epoch": 0.33, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 7.624982941275695e-07, "loss": 0.1964, "projector_lr": 2.2874948823827085e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.015625, "rewards_train/margins": 4.125, "rewards_train/rejected": -6.15625, "sft_loss": 0.8125, "step": 2068 }, { "dpo_loss": 0.08154296875, "epoch": 0.33, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 7.622821763208343e-07, "loss": 0.1067, "projector_lr": 2.286846528962503e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.03125, "sft_loss": 0.6015625, "step": 2069 }, { "dpo_loss": 0.12890625, "epoch": 0.33, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 7.620659908889426e-07, "loss": 0.1145, "projector_lr": 2.286197972666828e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.357421875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.8125, "sft_loss": 0.875, "step": 2070 }, { "dpo_loss": 0.90625, "epoch": 0.33, "final_loss": 0.90625, "grad_norm": 0.0, "learning_rate": 7.618497378876345e-07, "loss": 0.4917, "projector_lr": 2.2855492136629037e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.0625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -5.46875, "sft_loss": 0.7578125, "step": 2071 }, { "dpo_loss": 0.09765625, "epoch": 0.33, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 7.616334173726671e-07, "loss": 0.1261, "projector_lr": 2.2849002521180014e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.84375, "sft_loss": 0.67578125, "step": 2072 }, { "dpo_loss": 0.0162353515625, "epoch": 0.33, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 7.614170293998152e-07, "loss": 0.0257, "projector_lr": 2.2842510881994457e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 7.0, "rewards_train/rejected": -7.9375, "sft_loss": 0.671875, "step": 2073 }, { "dpo_loss": 0.40625, "epoch": 0.33, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 7.612005740248707e-07, "loss": 0.2211, "projector_lr": 2.2836017220746123e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.59375, "sft_loss": 0.96484375, "step": 2074 }, { "dpo_loss": 0.314453125, "epoch": 0.33, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 7.609840513036433e-07, "loss": 0.2485, "projector_lr": 2.28295215391093e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.0625, "sft_loss": 0.84375, "step": 2075 }, { "dpo_loss": 0.345703125, "epoch": 0.33, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 7.607674612919597e-07, "loss": 0.3874, "projector_lr": 2.282302383875879e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.46875, "sft_loss": 0.69921875, "step": 2076 }, { "dpo_loss": 0.171875, "epoch": 0.33, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 7.605508040456641e-07, "loss": 0.1956, "projector_lr": 2.281652412136992e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.796875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.75, "sft_loss": 0.91015625, "step": 2077 }, { "dpo_loss": 0.2333984375, "epoch": 0.33, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 7.603340796206179e-07, "loss": 0.1538, "projector_lr": 2.281002238861854e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.40625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.15625, "sft_loss": 0.7421875, "step": 2078 }, { "dpo_loss": 0.353515625, "epoch": 0.33, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 7.601172880727002e-07, "loss": 0.205, "projector_lr": 2.2803518642181005e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.46875, "sft_loss": 0.72265625, "step": 2079 }, { "dpo_loss": 0.41015625, "epoch": 0.33, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 7.599004294578069e-07, "loss": 0.2259, "projector_lr": 2.279701288373421e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -4.21875, "sft_loss": 1.0546875, "step": 2080 }, { "dpo_loss": 0.453125, "epoch": 0.33, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 7.596835038318517e-07, "loss": 0.2432, "projector_lr": 2.279050511495555e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -3.90625, "sft_loss": 1.03125, "step": 2081 }, { "dpo_loss": 0.7265625, "epoch": 0.33, "final_loss": 0.7265625, "grad_norm": 0.0, "learning_rate": 7.59466511250765e-07, "loss": 0.4149, "projector_lr": 2.2783995337522952e-06, "rewards_train/accuracies": 0.375, "rewards_train/chosen": -2.046875, "rewards_train/margins": 1.28125, "rewards_train/rejected": -3.3125, "sft_loss": 0.796875, "step": 2082 }, { "dpo_loss": 0.07861328125, "epoch": 0.33, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 7.592494517704949e-07, "loss": 0.1693, "projector_lr": 2.277748355311485e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.09375, "sft_loss": 0.6328125, "step": 2083 }, { "dpo_loss": 0.08935546875, "epoch": 0.33, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 7.590323254470068e-07, "loss": 0.154, "projector_lr": 2.2770969763410204e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 3.09375, "rewards_train/rejected": -3.796875, "sft_loss": 0.875, "step": 2084 }, { "dpo_loss": 0.37890625, "epoch": 0.33, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 7.588151323362829e-07, "loss": 0.223, "projector_lr": 2.2764453970088487e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.046875, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.625, "sft_loss": 1.0078125, "step": 2085 }, { "dpo_loss": 0.171875, "epoch": 0.33, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 7.585978724943232e-07, "loss": 0.1021, "projector_lr": 2.2757936174829696e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.125, "sft_loss": 0.52734375, "step": 2086 }, { "dpo_loss": 0.251953125, "epoch": 0.33, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 7.583805459771442e-07, "loss": 0.2572, "projector_lr": 2.2751416379314328e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.5625, "sft_loss": 0.765625, "step": 2087 }, { "dpo_loss": 0.0966796875, "epoch": 0.33, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 7.581631528407801e-07, "loss": 0.0617, "projector_lr": 2.2744894585223407e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.59375, "sft_loss": 0.85546875, "step": 2088 }, { "dpo_loss": 0.07421875, "epoch": 0.33, "final_loss": 0.07421875, "grad_norm": 0.0, "learning_rate": 7.579456931412825e-07, "loss": 0.0644, "projector_lr": 2.2738370794238473e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 5.5, "rewards_train/rejected": -7.28125, "sft_loss": 0.69921875, "step": 2089 }, { "dpo_loss": 0.10693359375, "epoch": 0.33, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 7.57728166934719e-07, "loss": 0.2078, "projector_lr": 2.2731845008041576e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.15625, "sft_loss": 0.875, "step": 2090 }, { "dpo_loss": 0.267578125, "epoch": 0.33, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 7.575105742771763e-07, "loss": 0.1958, "projector_lr": 2.2725317228315288e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 3.046875, "rewards_train/rejected": -4.0625, "sft_loss": 0.7890625, "step": 2091 }, { "dpo_loss": 0.0245361328125, "epoch": 0.33, "final_loss": 0.0245361328125, "grad_norm": 0.0, "learning_rate": 7.572929152247561e-07, "loss": 0.2031, "projector_lr": 2.2718787456742684e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.3125, "sft_loss": 0.7109375, "step": 2092 }, { "dpo_loss": 0.2001953125, "epoch": 0.33, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 7.570751898335788e-07, "loss": 0.146, "projector_lr": 2.2712255695007365e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.15625, "sft_loss": 0.8203125, "step": 2093 }, { "dpo_loss": 0.220703125, "epoch": 0.34, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 7.56857398159781e-07, "loss": 0.1852, "projector_lr": 2.270572194479343e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.78125, "sft_loss": 0.88671875, "step": 2094 }, { "dpo_loss": 0.103515625, "epoch": 0.34, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 7.56639540259517e-07, "loss": 0.1203, "projector_lr": 2.269918620778551e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.0625, "sft_loss": 0.671875, "step": 2095 }, { "dpo_loss": 0.140625, "epoch": 0.34, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 7.564216161889576e-07, "loss": 0.1873, "projector_lr": 2.269264848566873e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.15625, "sft_loss": 1.203125, "step": 2096 }, { "dpo_loss": 0.333984375, "epoch": 0.34, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 7.562036260042909e-07, "loss": 0.3968, "projector_lr": 2.2686108780128733e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.1875, "sft_loss": 0.734375, "step": 2097 }, { "dpo_loss": 0.244140625, "epoch": 0.34, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 7.559855697617225e-07, "loss": 0.2993, "projector_lr": 2.2679567092851677e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.96875, "sft_loss": 0.7734375, "step": 2098 }, { "dpo_loss": 0.2412109375, "epoch": 0.34, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 7.557674475174745e-07, "loss": 0.182, "projector_lr": 2.2673023425524236e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.59375, "sft_loss": 0.7421875, "step": 2099 }, { "dpo_loss": 0.384765625, "epoch": 0.34, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 7.555492593277859e-07, "loss": 0.349, "projector_lr": 2.266647777983358e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.59375, "sft_loss": 0.6875, "step": 2100 }, { "dpo_loss": 0.53515625, "epoch": 0.34, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 7.55331005248913e-07, "loss": 0.4078, "projector_lr": 2.2659930157467393e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 1.609375, "rewards_train/rejected": -2.828125, "sft_loss": 0.8515625, "step": 2101 }, { "dpo_loss": 0.259765625, "epoch": 0.34, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 7.551126853371292e-07, "loss": 0.2297, "projector_lr": 2.265338056011388e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.8125, "sft_loss": 0.90625, "step": 2102 }, { "dpo_loss": 0.26953125, "epoch": 0.34, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 7.548942996487246e-07, "loss": 0.292, "projector_lr": 2.2646828989461736e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 2.09375, "rewards_train/rejected": -3.40625, "sft_loss": 0.90234375, "step": 2103 }, { "dpo_loss": 0.169921875, "epoch": 0.34, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 7.546758482400064e-07, "loss": 0.15, "projector_lr": 2.2640275447200193e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.78125, "sft_loss": 0.921875, "step": 2104 }, { "dpo_loss": 0.0947265625, "epoch": 0.34, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 7.544573311672986e-07, "loss": 0.1067, "projector_lr": 2.263371993501896e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.6875, "sft_loss": 0.7109375, "step": 2105 }, { "dpo_loss": 0.248046875, "epoch": 0.34, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 7.542387484869424e-07, "loss": 0.1691, "projector_lr": 2.2627162454608273e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4375, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.3125, "sft_loss": 0.67578125, "step": 2106 }, { "dpo_loss": 0.1318359375, "epoch": 0.34, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 7.540201002552957e-07, "loss": 0.246, "projector_lr": 2.2620603007658875e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -3.9375, "sft_loss": 0.77734375, "step": 2107 }, { "dpo_loss": 0.2373046875, "epoch": 0.34, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 7.538013865287333e-07, "loss": 0.1923, "projector_lr": 2.2614041595861998e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.078125, "rewards_train/margins": 5.0, "rewards_train/rejected": -7.09375, "sft_loss": 0.69921875, "step": 2108 }, { "dpo_loss": 0.043212890625, "epoch": 0.34, "final_loss": 0.043212890625, "grad_norm": 0.0, "learning_rate": 7.535826073636471e-07, "loss": 0.3915, "projector_lr": 2.2607478220909415e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.3125, "sft_loss": 0.5546875, "step": 2109 }, { "dpo_loss": 0.031494140625, "epoch": 0.34, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 7.533637628164455e-07, "loss": 0.0996, "projector_lr": 2.260091288449337e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 4.375, "rewards_train/rejected": -4.875, "sft_loss": 0.64453125, "step": 2110 }, { "dpo_loss": 0.1767578125, "epoch": 0.34, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 7.531448529435543e-07, "loss": 0.1641, "projector_lr": 2.259434558830663e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.5625, "sft_loss": 0.78515625, "step": 2111 }, { "dpo_loss": 0.189453125, "epoch": 0.34, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 7.529258778014153e-07, "loss": 0.2099, "projector_lr": 2.2587776334042464e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.6875, "sft_loss": 0.703125, "step": 2112 }, { "dpo_loss": 0.2060546875, "epoch": 0.34, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 7.527068374464883e-07, "loss": 0.1137, "projector_lr": 2.258120512339465e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.15625, "sft_loss": 0.765625, "step": 2113 }, { "dpo_loss": 0.1328125, "epoch": 0.34, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 7.524877319352487e-07, "loss": 0.0989, "projector_lr": 2.2574631958057464e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.3125, "sft_loss": 0.7578125, "step": 2114 }, { "dpo_loss": 0.373046875, "epoch": 0.34, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 7.522685613241894e-07, "loss": 0.5075, "projector_lr": 2.2568056839725684e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.625, "sft_loss": 0.51953125, "step": 2115 }, { "dpo_loss": 0.0068359375, "epoch": 0.34, "final_loss": 0.0068359375, "grad_norm": 0.0, "learning_rate": 7.520493256698204e-07, "loss": 0.1443, "projector_lr": 2.256147977009461e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 6.84375, "rewards_train/rejected": -8.0625, "sft_loss": 0.6953125, "step": 2116 }, { "dpo_loss": 0.2060546875, "epoch": 0.34, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 7.518300250286673e-07, "loss": 0.1501, "projector_lr": 2.255490075086002e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.640625, "sft_loss": 0.6953125, "step": 2117 }, { "dpo_loss": 0.451171875, "epoch": 0.34, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 7.516106594572738e-07, "loss": 0.3074, "projector_lr": 2.2548319783718214e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.625, "rewards_train/margins": 1.765625, "rewards_train/rejected": -3.390625, "sft_loss": 0.84765625, "step": 2118 }, { "dpo_loss": 0.1953125, "epoch": 0.34, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 7.513912290121994e-07, "loss": 0.17, "projector_lr": 2.254173687036598e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.03125, "sft_loss": 0.8046875, "step": 2119 }, { "dpo_loss": 0.1259765625, "epoch": 0.34, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 7.511717337500205e-07, "loss": 0.4608, "projector_lr": 2.2535152012500616e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 2.875, "rewards_train/rejected": -4.15625, "sft_loss": 0.69140625, "step": 2120 }, { "dpo_loss": 0.65234375, "epoch": 0.34, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 7.509521737273306e-07, "loss": 0.5315, "projector_lr": 2.252856521181992e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 2.140625, "rewards_train/rejected": -3.34375, "sft_loss": 0.7109375, "step": 2121 }, { "dpo_loss": 0.30859375, "epoch": 0.34, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 7.507325490007397e-07, "loss": 0.3527, "projector_lr": 2.252197647002219e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.875, "sft_loss": 0.64453125, "step": 2122 }, { "dpo_loss": 0.0101318359375, "epoch": 0.34, "final_loss": 0.0101318359375, "grad_norm": 0.0, "learning_rate": 7.505128596268742e-07, "loss": 0.1014, "projector_lr": 2.251538578880623e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.4375, "sft_loss": 0.65625, "step": 2123 }, { "dpo_loss": 0.392578125, "epoch": 0.34, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 7.502931056623773e-07, "loss": 0.5862, "projector_lr": 2.250879316987132e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.46875, "sft_loss": 0.6640625, "step": 2124 }, { "dpo_loss": 0.03564453125, "epoch": 0.34, "final_loss": 0.03564453125, "grad_norm": 0.0, "learning_rate": 7.500732871639096e-07, "loss": 0.271, "projector_lr": 2.250219861491729e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -5.125, "sft_loss": 0.6953125, "step": 2125 }, { "dpo_loss": 0.142578125, "epoch": 0.34, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 7.498534041881471e-07, "loss": 0.116, "projector_lr": 2.2495602125644413e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 3.25, "rewards_train/rejected": -3.9375, "sft_loss": 1.015625, "step": 2126 }, { "dpo_loss": 0.2021484375, "epoch": 0.34, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 7.496334567917831e-07, "loss": 0.1979, "projector_lr": 2.2489003703753496e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.75, "sft_loss": 0.7890625, "step": 2127 }, { "dpo_loss": 0.255859375, "epoch": 0.34, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 7.494134450315276e-07, "loss": 0.1971, "projector_lr": 2.248240335094583e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 2.359375, "rewards_train/rejected": -3.203125, "sft_loss": 0.8125, "step": 2128 }, { "dpo_loss": 0.5078125, "epoch": 0.34, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 7.49193368964107e-07, "loss": 0.3283, "projector_lr": 2.247580106892321e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 3.046875, "rewards_train/rejected": -4.9375, "sft_loss": 0.84375, "step": 2129 }, { "dpo_loss": 0.11279296875, "epoch": 0.34, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 7.489732286462641e-07, "loss": 0.1171, "projector_lr": 2.2469196859387926e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.125, "sft_loss": 0.76953125, "step": 2130 }, { "dpo_loss": 0.039306640625, "epoch": 0.34, "final_loss": 0.039306640625, "grad_norm": 0.0, "learning_rate": 7.487530241347587e-07, "loss": 0.1859, "projector_lr": 2.2462590724042763e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.4375, "sft_loss": 0.76953125, "step": 2131 }, { "dpo_loss": 0.400390625, "epoch": 0.34, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 7.485327554863668e-07, "loss": 0.3071, "projector_lr": 2.2455982664591003e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 1.46875, "rewards_train/rejected": -2.203125, "sft_loss": 0.84765625, "step": 2132 }, { "dpo_loss": 0.1259765625, "epoch": 0.34, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 7.48312422757881e-07, "loss": 0.1363, "projector_lr": 2.2449372682736433e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.84375, "sft_loss": 0.921875, "step": 2133 }, { "dpo_loss": 0.09814453125, "epoch": 0.34, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 7.480920260061107e-07, "loss": 0.1534, "projector_lr": 2.244276078018332e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.1875, "sft_loss": 0.6796875, "step": 2134 }, { "dpo_loss": 0.058837890625, "epoch": 0.34, "final_loss": 0.058837890625, "grad_norm": 0.0, "learning_rate": 7.478715652878814e-07, "loss": 0.0554, "projector_lr": 2.243614695863644e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.984375, "sft_loss": 0.796875, "step": 2135 }, { "dpo_loss": 0.01409912109375, "epoch": 0.34, "final_loss": 0.01409912109375, "grad_norm": 0.0, "learning_rate": 7.476510406600353e-07, "loss": 0.2217, "projector_lr": 2.242953121980106e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.96875, "sft_loss": 0.953125, "step": 2136 }, { "dpo_loss": 0.07861328125, "epoch": 0.34, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 7.47430452179431e-07, "loss": 0.1219, "projector_lr": 2.2422913565382933e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.125, "sft_loss": 0.53125, "step": 2137 }, { "dpo_loss": 0.109375, "epoch": 0.34, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 7.472097999029439e-07, "loss": 0.1277, "projector_lr": 2.2416293997088318e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40625, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.78125, "sft_loss": 0.8125, "step": 2138 }, { "dpo_loss": 0.32421875, "epoch": 0.34, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 7.469890838874652e-07, "loss": 0.4233, "projector_lr": 2.2409672516623956e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.125, "sft_loss": 0.625, "step": 2139 }, { "dpo_loss": 0.1162109375, "epoch": 0.34, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 7.467683041899032e-07, "loss": 0.2014, "projector_lr": 2.2403049125697097e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.03125, "sft_loss": 0.54296875, "step": 2140 }, { "dpo_loss": 0.48046875, "epoch": 0.34, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 7.465474608671822e-07, "loss": 0.3478, "projector_lr": 2.239642382601547e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.953125, "sft_loss": 0.609375, "step": 2141 }, { "dpo_loss": 0.193359375, "epoch": 0.34, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 7.463265539762429e-07, "loss": 0.1919, "projector_lr": 2.2389796619287287e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.96875, "sft_loss": 0.69921875, "step": 2142 }, { "dpo_loss": 0.5546875, "epoch": 0.34, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 7.461055835740429e-07, "loss": 0.2856, "projector_lr": 2.238316750722129e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.171875, "rewards_train/margins": 2.65625, "rewards_train/rejected": -3.84375, "sft_loss": 0.75, "step": 2143 }, { "dpo_loss": 0.09228515625, "epoch": 0.34, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 7.458845497175554e-07, "loss": 0.0854, "projector_lr": 2.237653649152666e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.1875, "sft_loss": 1.0078125, "step": 2144 }, { "dpo_loss": 0.279296875, "epoch": 0.34, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 7.456634524637704e-07, "loss": 0.2428, "projector_lr": 2.2369903573913114e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 2.78125, "rewards_train/rejected": -4.03125, "sft_loss": 0.71484375, "step": 2145 }, { "dpo_loss": 0.173828125, "epoch": 0.34, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 7.454422918696944e-07, "loss": 0.2417, "projector_lr": 2.2363268756090835e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.296875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -3.734375, "sft_loss": 0.828125, "step": 2146 }, { "dpo_loss": 0.00439453125, "epoch": 0.34, "final_loss": 0.00439453125, "grad_norm": 0.0, "learning_rate": 7.452210679923501e-07, "loss": 0.193, "projector_lr": 2.2356632039770505e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.6875, "sft_loss": 0.7890625, "step": 2147 }, { "dpo_loss": 0.443359375, "epoch": 0.34, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 7.44999780888776e-07, "loss": 0.3387, "projector_lr": 2.2349993426663284e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.4375, "sft_loss": 0.73828125, "step": 2148 }, { "dpo_loss": 0.10546875, "epoch": 0.34, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 7.447784306160278e-07, "loss": 0.1138, "projector_lr": 2.234335291848084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0517578125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.6875, "sft_loss": 0.75, "step": 2149 }, { "dpo_loss": 0.60546875, "epoch": 0.34, "final_loss": 0.60546875, "grad_norm": 0.0, "learning_rate": 7.445570172311769e-07, "loss": 0.3736, "projector_lr": 2.2336710516935307e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.4375, "sft_loss": 0.72265625, "step": 2150 }, { "dpo_loss": 0.11767578125, "epoch": 0.34, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 7.443355407913108e-07, "loss": 0.2052, "projector_lr": 2.2330066223739326e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.328125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.625, "sft_loss": 0.703125, "step": 2151 }, { "dpo_loss": 0.54296875, "epoch": 0.34, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 7.44114001353534e-07, "loss": 0.4694, "projector_lr": 2.2323420040606024e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 2.109375, "rewards_train/rejected": -3.421875, "sft_loss": 0.68359375, "step": 2152 }, { "dpo_loss": 0.08447265625, "epoch": 0.34, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 7.438923989749667e-07, "loss": 0.0722, "projector_lr": 2.2316771969249003e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06884765625, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.3125, "sft_loss": 0.419921875, "step": 2153 }, { "dpo_loss": 0.10400390625, "epoch": 0.34, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 7.436707337127455e-07, "loss": 0.1991, "projector_lr": 2.2310122011382366e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.03173828125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -4.5625, "sft_loss": 0.72265625, "step": 2154 }, { "dpo_loss": 0.05224609375, "epoch": 0.34, "final_loss": 0.05224609375, "grad_norm": 0.0, "learning_rate": 7.434490056240228e-07, "loss": 0.117, "projector_lr": 2.2303470168720686e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.453125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.875, "sft_loss": 0.6796875, "step": 2155 }, { "dpo_loss": 0.026611328125, "epoch": 0.34, "final_loss": 0.026611328125, "grad_norm": 0.0, "learning_rate": 7.432272147659677e-07, "loss": 0.2753, "projector_lr": 2.2296816442979035e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.1875, "sft_loss": 0.84375, "step": 2156 }, { "dpo_loss": 0.044189453125, "epoch": 0.35, "final_loss": 0.044189453125, "grad_norm": 0.0, "learning_rate": 7.430053611957657e-07, "loss": 0.2112, "projector_lr": 2.2290160835872972e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.1875, "sft_loss": 0.53515625, "step": 2157 }, { "dpo_loss": 0.2294921875, "epoch": 0.35, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 7.427834449706177e-07, "loss": 0.1756, "projector_lr": 2.228350334911853e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.734375, "sft_loss": 0.70703125, "step": 2158 }, { "dpo_loss": 0.19921875, "epoch": 0.35, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 7.425614661477413e-07, "loss": 0.2701, "projector_lr": 2.227684398443224e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.0, "sft_loss": 0.6484375, "step": 2159 }, { "dpo_loss": 0.1728515625, "epoch": 0.35, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 7.423394247843701e-07, "loss": 0.1293, "projector_lr": 2.2270182743531107e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.4375, "sft_loss": 0.87109375, "step": 2160 }, { "dpo_loss": 0.0308837890625, "epoch": 0.35, "final_loss": 0.0308837890625, "grad_norm": 0.0, "learning_rate": 7.421173209377538e-07, "loss": 0.0584, "projector_lr": 2.2263519628132617e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.328125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.53125, "sft_loss": 0.72265625, "step": 2161 }, { "dpo_loss": 0.302734375, "epoch": 0.35, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 7.418951546651582e-07, "loss": 0.208, "projector_lr": 2.2256854639954747e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.166015625, "rewards_train/margins": 3.71875, "rewards_train/rejected": -3.890625, "sft_loss": 0.98828125, "step": 2162 }, { "dpo_loss": 0.279296875, "epoch": 0.35, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 7.416729260238653e-07, "loss": 0.1901, "projector_lr": 2.2250187780715963e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.6875, "sft_loss": 0.53515625, "step": 2163 }, { "dpo_loss": 0.50390625, "epoch": 0.35, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 7.41450635071173e-07, "loss": 0.3277, "projector_lr": 2.224351905213519e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.9375, "sft_loss": 0.55859375, "step": 2164 }, { "dpo_loss": 0.1416015625, "epoch": 0.35, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 7.412282818643955e-07, "loss": 0.1548, "projector_lr": 2.223684845593187e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.78125, "sft_loss": 0.84375, "step": 2165 }, { "dpo_loss": 0.474609375, "epoch": 0.35, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 7.41005866460863e-07, "loss": 0.3442, "projector_lr": 2.223017599382589e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.75, "sft_loss": 0.76171875, "step": 2166 }, { "dpo_loss": 0.47265625, "epoch": 0.35, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 7.407833889179215e-07, "loss": 0.4944, "projector_lr": 2.2223501667537644e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.65625, "rewards_train/margins": 1.65625, "rewards_train/rejected": -2.3125, "sft_loss": 0.7421875, "step": 2167 }, { "dpo_loss": 0.107421875, "epoch": 0.35, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 7.405608492929331e-07, "loss": 0.1559, "projector_lr": 2.2216825478788e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.84375, "sft_loss": 0.76171875, "step": 2168 }, { "dpo_loss": 0.0927734375, "epoch": 0.35, "final_loss": 0.0927734375, "grad_norm": 0.0, "learning_rate": 7.403382476432762e-07, "loss": 0.1309, "projector_lr": 2.2210147429298286e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0810546875, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.5, "sft_loss": 0.66015625, "step": 2169 }, { "dpo_loss": 0.296875, "epoch": 0.35, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 7.401155840263449e-07, "loss": 0.1815, "projector_lr": 2.220346752079035e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2099609375, "rewards_train/margins": 2.890625, "rewards_train/rejected": -3.09375, "sft_loss": 0.765625, "step": 2170 }, { "dpo_loss": 0.375, "epoch": 0.35, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 7.398928584995493e-07, "loss": 0.1986, "projector_lr": 2.219678575498648e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.875, "sft_loss": 0.80078125, "step": 2171 }, { "dpo_loss": 0.12158203125, "epoch": 0.35, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 7.396700711203157e-07, "loss": 0.1078, "projector_lr": 2.219010213360947e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.53125, "sft_loss": 0.74609375, "step": 2172 }, { "dpo_loss": 0.150390625, "epoch": 0.35, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 7.39447221946086e-07, "loss": 0.1522, "projector_lr": 2.218341665838258e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.140625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -3.78125, "sft_loss": 0.58984375, "step": 2173 }, { "dpo_loss": 0.0908203125, "epoch": 0.35, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 7.392243110343182e-07, "loss": 0.4374, "projector_lr": 2.217672933102955e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.40625, "sft_loss": 0.640625, "step": 2174 }, { "dpo_loss": 0.0888671875, "epoch": 0.35, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 7.390013384424863e-07, "loss": 0.2862, "projector_lr": 2.217004015327459e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09912109375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.0625, "sft_loss": 0.59765625, "step": 2175 }, { "dpo_loss": 0.10693359375, "epoch": 0.35, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 7.387783042280803e-07, "loss": 0.173, "projector_lr": 2.216334912684241e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.84375, "sft_loss": 1.1328125, "step": 2176 }, { "dpo_loss": 0.625, "epoch": 0.35, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 7.385552084486055e-07, "loss": 0.4236, "projector_lr": 2.215665625345817e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 2.03125, "rewards_train/rejected": -3.15625, "sft_loss": 0.81640625, "step": 2177 }, { "dpo_loss": 0.26171875, "epoch": 0.35, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 7.383320511615837e-07, "loss": 0.1598, "projector_lr": 2.2149961534847513e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2294921875, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.640625, "sft_loss": 0.93359375, "step": 2178 }, { "dpo_loss": 0.1337890625, "epoch": 0.35, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 7.381088324245524e-07, "loss": 0.118, "projector_lr": 2.2143264972736577e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.400390625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -5.1875, "sft_loss": 0.75390625, "step": 2179 }, { "dpo_loss": 0.12353515625, "epoch": 0.35, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 7.378855522950648e-07, "loss": 0.1496, "projector_lr": 2.2136566568851947e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.482421875, "rewards_train/margins": 3.859375, "rewards_train/rejected": -3.375, "sft_loss": 0.94140625, "step": 2180 }, { "dpo_loss": 0.28125, "epoch": 0.35, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 7.376622108306903e-07, "loss": 0.2658, "projector_lr": 2.2129866324920707e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.09375, "sft_loss": 0.69921875, "step": 2181 }, { "dpo_loss": 0.034912109375, "epoch": 0.35, "final_loss": 0.034912109375, "grad_norm": 0.0, "learning_rate": 7.374388080890134e-07, "loss": 0.1462, "projector_lr": 2.21231642426704e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.8125, "sft_loss": 0.78515625, "step": 2182 }, { "dpo_loss": 0.142578125, "epoch": 0.35, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 7.372153441276351e-07, "loss": 0.1051, "projector_lr": 2.2116460323829053e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.03125, "sft_loss": 0.859375, "step": 2183 }, { "dpo_loss": 0.06884765625, "epoch": 0.35, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 7.369918190041717e-07, "loss": 0.2266, "projector_lr": 2.2109754570125152e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.609375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.5625, "sft_loss": 0.8125, "step": 2184 }, { "dpo_loss": 0.30859375, "epoch": 0.35, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 7.367682327762558e-07, "loss": 0.1675, "projector_lr": 2.2103046983287675e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.1943359375, "rewards_train/margins": 3.375, "rewards_train/rejected": -3.1875, "sft_loss": 0.8828125, "step": 2185 }, { "dpo_loss": 0.08544921875, "epoch": 0.35, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 7.365445855015351e-07, "loss": 0.1532, "projector_lr": 2.2096337565046055e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.796875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.0625, "sft_loss": 0.7890625, "step": 2186 }, { "dpo_loss": 0.5, "epoch": 0.35, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 7.363208772376736e-07, "loss": 0.2827, "projector_lr": 2.208962631713021e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.0625, "sft_loss": 0.7734375, "step": 2187 }, { "dpo_loss": 0.310546875, "epoch": 0.35, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 7.360971080423508e-07, "loss": 0.1807, "projector_lr": 2.208291324127053e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.75, "rewards_train/rejected": -6.8125, "sft_loss": 0.70703125, "step": 2188 }, { "dpo_loss": 0.6640625, "epoch": 0.35, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 7.358732779732618e-07, "loss": 0.418, "projector_lr": 2.2076198339197856e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.796875, "sft_loss": 0.7109375, "step": 2189 }, { "dpo_loss": 0.421875, "epoch": 0.35, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 7.356493870881178e-07, "loss": 0.2589, "projector_lr": 2.2069481612643536e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.921875, "sft_loss": 0.8515625, "step": 2190 }, { "dpo_loss": 0.11376953125, "epoch": 0.35, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 7.354254354446449e-07, "loss": 0.1343, "projector_lr": 2.206276306333935e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2392578125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -4.96875, "sft_loss": 0.451171875, "step": 2191 }, { "dpo_loss": 0.134765625, "epoch": 0.35, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 7.352014231005857e-07, "loss": 0.1809, "projector_lr": 2.2056042693017574e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.361328125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.6875, "sft_loss": 0.84375, "step": 2192 }, { "dpo_loss": 0.10595703125, "epoch": 0.35, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 7.349773501136981e-07, "loss": 0.447, "projector_lr": 2.2049320503410945e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.03271484375, "rewards_train/margins": 3.828125, "rewards_train/rejected": -3.859375, "sft_loss": 0.66015625, "step": 2193 }, { "dpo_loss": 0.09033203125, "epoch": 0.35, "final_loss": 0.09033203125, "grad_norm": 0.0, "learning_rate": 7.347532165417556e-07, "loss": 0.0547, "projector_lr": 2.204259649625267e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28125, "rewards_train/margins": 3.15625, "rewards_train/rejected": -3.453125, "sft_loss": 0.75, "step": 2194 }, { "dpo_loss": 0.2265625, "epoch": 0.35, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 7.345290224425473e-07, "loss": 0.2542, "projector_lr": 2.2035870673276422e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2001953125, "rewards_train/margins": 3.53125, "rewards_train/rejected": -3.71875, "sft_loss": 0.93359375, "step": 2195 }, { "dpo_loss": 0.0576171875, "epoch": 0.35, "final_loss": 0.0576171875, "grad_norm": 0.0, "learning_rate": 7.343047678738781e-07, "loss": 0.1339, "projector_lr": 2.2029143036216345e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.59375, "sft_loss": 0.6171875, "step": 2196 }, { "dpo_loss": 0.5546875, "epoch": 0.35, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 7.340804528935682e-07, "loss": 0.414, "projector_lr": 2.202241358680705e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.25, "sft_loss": 0.5078125, "step": 2197 }, { "dpo_loss": 0.1796875, "epoch": 0.35, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 7.338560775594536e-07, "loss": 0.1698, "projector_lr": 2.201568232678361e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.10009765625, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.25, "sft_loss": 1.0078125, "step": 2198 }, { "dpo_loss": 0.07666015625, "epoch": 0.35, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 7.336316419293858e-07, "loss": 0.1536, "projector_lr": 2.2008949257881575e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.181640625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -4.59375, "sft_loss": 0.796875, "step": 2199 }, { "dpo_loss": 0.03759765625, "epoch": 0.35, "final_loss": 0.03759765625, "grad_norm": 0.0, "learning_rate": 7.334071460612319e-07, "loss": 0.0677, "projector_lr": 2.200221438183696e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.15625, "sft_loss": 0.875, "step": 2200 }, { "dpo_loss": 0.045166015625, "epoch": 0.35, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 7.331825900128745e-07, "loss": 0.1598, "projector_lr": 2.1995477700386236e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.15625, "sft_loss": 0.87109375, "step": 2201 }, { "dpo_loss": 0.115234375, "epoch": 0.35, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 7.329579738422114e-07, "loss": 0.3748, "projector_lr": 2.1988739215266347e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.28125, "sft_loss": 0.890625, "step": 2202 }, { "dpo_loss": 0.1689453125, "epoch": 0.35, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 7.327332976071567e-07, "loss": 0.2455, "projector_lr": 2.19819989282147e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.96875, "sft_loss": 0.84375, "step": 2203 }, { "dpo_loss": 0.138671875, "epoch": 0.35, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 7.325085613656391e-07, "loss": 0.2102, "projector_lr": 2.1975256840969176e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.34375, "sft_loss": 0.66796875, "step": 2204 }, { "dpo_loss": 0.08642578125, "epoch": 0.35, "final_loss": 0.08642578125, "grad_norm": 0.0, "learning_rate": 7.322837651756033e-07, "loss": 0.2241, "projector_lr": 2.19685129552681e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 3.953125, "rewards_train/rejected": -5.4375, "sft_loss": 0.7109375, "step": 2205 }, { "dpo_loss": 0.1298828125, "epoch": 0.35, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 7.320589090950093e-07, "loss": 0.1342, "projector_lr": 2.196176727285028e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.5625, "sft_loss": 0.89453125, "step": 2206 }, { "dpo_loss": 0.29296875, "epoch": 0.35, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 7.318339931818324e-07, "loss": 0.2514, "projector_lr": 2.1955019795454978e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.140625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.625, "sft_loss": 0.64453125, "step": 2207 }, { "dpo_loss": 0.263671875, "epoch": 0.35, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 7.316090174940637e-07, "loss": 0.4323, "projector_lr": 2.1948270524821913e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.05712890625, "rewards_train/margins": 2.171875, "rewards_train/rejected": -2.21875, "sft_loss": 0.9765625, "step": 2208 }, { "dpo_loss": 0.380859375, "epoch": 0.35, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 7.313839820897094e-07, "loss": 0.3333, "projector_lr": 2.1941519462691283e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.953125, "sft_loss": 0.89453125, "step": 2209 }, { "dpo_loss": 0.0947265625, "epoch": 0.35, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 7.311588870267912e-07, "loss": 0.2226, "projector_lr": 2.193476661080374e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.416015625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.09375, "sft_loss": 0.7890625, "step": 2210 }, { "dpo_loss": 0.373046875, "epoch": 0.35, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 7.30933732363346e-07, "loss": 0.2412, "projector_lr": 2.1928011970900383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.515625, "rewards_train/margins": 3.15625, "rewards_train/rejected": -3.671875, "sft_loss": 0.5859375, "step": 2211 }, { "dpo_loss": 0.1416015625, "epoch": 0.35, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 7.307085181574265e-07, "loss": 0.2022, "projector_lr": 2.1921255544722796e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53125, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.4375, "sft_loss": 0.80078125, "step": 2212 }, { "dpo_loss": 0.040283203125, "epoch": 0.35, "final_loss": 0.040283203125, "grad_norm": 0.0, "learning_rate": 7.304832444671004e-07, "loss": 0.1822, "projector_lr": 2.1914497334013015e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.890625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.9375, "sft_loss": 0.6484375, "step": 2213 }, { "dpo_loss": 0.1376953125, "epoch": 0.35, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 7.302579113504507e-07, "loss": 0.168, "projector_lr": 2.1907737340513523e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.71875, "sft_loss": 0.8828125, "step": 2214 }, { "dpo_loss": 0.1865234375, "epoch": 0.35, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 7.30032518865576e-07, "loss": 0.1761, "projector_lr": 2.1900975565967284e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.375, "sft_loss": 0.7109375, "step": 2215 }, { "dpo_loss": 0.2431640625, "epoch": 0.35, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 7.2980706707059e-07, "loss": 0.1276, "projector_lr": 2.1894212012117704e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.03125, "sft_loss": 0.6796875, "step": 2216 }, { "dpo_loss": 0.435546875, "epoch": 0.35, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 7.295815560236218e-07, "loss": 0.3361, "projector_lr": 2.1887446680708655e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.5625, "sft_loss": 0.828125, "step": 2217 }, { "dpo_loss": 0.107421875, "epoch": 0.35, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 7.293559857828155e-07, "loss": 0.1324, "projector_lr": 2.188067957348447e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.90625, "sft_loss": 0.62890625, "step": 2218 }, { "dpo_loss": 0.06884765625, "epoch": 0.36, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 7.291303564063312e-07, "loss": 0.1285, "projector_lr": 2.1873910692189937e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.3125, "sft_loss": 0.65625, "step": 2219 }, { "dpo_loss": 0.13671875, "epoch": 0.36, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 7.289046679523431e-07, "loss": 0.2183, "projector_lr": 2.1867140038570295e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.03125, "sft_loss": 1.09375, "step": 2220 }, { "dpo_loss": 0.498046875, "epoch": 0.36, "final_loss": 0.498046875, "grad_norm": 0.0, "learning_rate": 7.286789204790416e-07, "loss": 0.2824, "projector_lr": 2.1860367614371246e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.03125, "sft_loss": 0.828125, "step": 2221 }, { "dpo_loss": 0.220703125, "epoch": 0.36, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 7.284531140446319e-07, "loss": 0.2249, "projector_lr": 2.185359342133896e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.28125, "sft_loss": 0.71875, "step": 2222 }, { "dpo_loss": 0.201171875, "epoch": 0.36, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 7.282272487073347e-07, "loss": 0.1294, "projector_lr": 2.184681746122004e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -3.90625, "sft_loss": 0.93359375, "step": 2223 }, { "dpo_loss": 0.119140625, "epoch": 0.36, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 7.280013245253853e-07, "loss": 0.1517, "projector_lr": 2.184003973576156e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.75, "sft_loss": 0.90625, "step": 2224 }, { "dpo_loss": 0.318359375, "epoch": 0.36, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 7.277753415570348e-07, "loss": 0.279, "projector_lr": 2.1833260246711045e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.125, "sft_loss": 0.494140625, "step": 2225 }, { "dpo_loss": 0.2109375, "epoch": 0.36, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 7.275492998605493e-07, "loss": 0.2536, "projector_lr": 2.182647899581648e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.375, "sft_loss": 0.796875, "step": 2226 }, { "dpo_loss": 0.5234375, "epoch": 0.36, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 7.273231994942095e-07, "loss": 0.3321, "projector_lr": 2.181969598482629e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.046875, "rewards_train/margins": 1.9140625, "rewards_train/rejected": -3.96875, "sft_loss": 0.8359375, "step": 2227 }, { "dpo_loss": 0.0244140625, "epoch": 0.36, "final_loss": 0.0244140625, "grad_norm": 0.0, "learning_rate": 7.270970405163122e-07, "loss": 0.0481, "projector_lr": 2.1812911215489367e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 7.21875, "rewards_train/rejected": -8.6875, "sft_loss": 0.76953125, "step": 2228 }, { "dpo_loss": 0.3359375, "epoch": 0.36, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 7.268708229851684e-07, "loss": 0.2459, "projector_lr": 2.180612468955505e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 2.234375, "rewards_train/rejected": -3.328125, "sft_loss": 0.8984375, "step": 2229 }, { "dpo_loss": 0.17578125, "epoch": 0.36, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 7.266445469591047e-07, "loss": 0.1038, "projector_lr": 2.179933640877314e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 6.75, "rewards_train/rejected": -8.5625, "sft_loss": 0.6484375, "step": 2230 }, { "dpo_loss": 0.1494140625, "epoch": 0.36, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 7.264182124964629e-07, "loss": 0.1684, "projector_lr": 2.1792546374893887e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 6.4375, "rewards_train/rejected": -8.375, "sft_loss": 0.87109375, "step": 2231 }, { "dpo_loss": 0.166015625, "epoch": 0.36, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 7.261918196555991e-07, "loss": 0.1939, "projector_lr": 2.1785754589667974e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.1875, "sft_loss": 0.8125, "step": 2232 }, { "dpo_loss": 0.059814453125, "epoch": 0.36, "final_loss": 0.059814453125, "grad_norm": 0.0, "learning_rate": 7.259653684948854e-07, "loss": 0.1149, "projector_lr": 2.1778961054846563e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.03125, "sft_loss": 0.75390625, "step": 2233 }, { "dpo_loss": 0.208984375, "epoch": 0.36, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 7.257388590727085e-07, "loss": 0.2124, "projector_lr": 2.1772165772181258e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.59375, "sft_loss": 0.79296875, "step": 2234 }, { "dpo_loss": 0.359375, "epoch": 0.36, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 7.255122914474699e-07, "loss": 0.2114, "projector_lr": 2.17653687434241e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.265625, "rewards_train/margins": 3.21875, "rewards_train/rejected": -5.5, "sft_loss": 0.890625, "step": 2235 }, { "dpo_loss": 0.043212890625, "epoch": 0.36, "final_loss": 0.043212890625, "grad_norm": 0.0, "learning_rate": 7.252856656775865e-07, "loss": 0.0967, "projector_lr": 2.17585699703276e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.109375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -7.15625, "sft_loss": 0.70703125, "step": 2236 }, { "dpo_loss": 0.0908203125, "epoch": 0.36, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 7.2505898182149e-07, "loss": 0.063, "projector_lr": 2.17517694546447e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.25, "sft_loss": 0.88671875, "step": 2237 }, { "dpo_loss": 0.4453125, "epoch": 0.36, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 7.248322399376274e-07, "loss": 0.2706, "projector_lr": 2.1744967198128824e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.6875, "sft_loss": 0.71484375, "step": 2238 }, { "dpo_loss": 0.064453125, "epoch": 0.36, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 7.246054400844599e-07, "loss": 0.124, "projector_lr": 2.17381632025338e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.96875, "sft_loss": 0.5390625, "step": 2239 }, { "dpo_loss": 0.44921875, "epoch": 0.36, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 7.243785823204642e-07, "loss": 0.4693, "projector_lr": 2.173135746961393e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.25, "sft_loss": 0.69921875, "step": 2240 }, { "dpo_loss": 0.39453125, "epoch": 0.36, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 7.241516667041323e-07, "loss": 0.2114, "projector_lr": 2.1724550001123972e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.9375, "rewards_train/margins": 2.0625, "rewards_train/rejected": -5.0, "sft_loss": 0.8359375, "step": 2241 }, { "dpo_loss": 0.21484375, "epoch": 0.36, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 7.239246932939701e-07, "loss": 0.1683, "projector_lr": 2.1717740798819106e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.53125, "sft_loss": 1.015625, "step": 2242 }, { "dpo_loss": 0.326171875, "epoch": 0.36, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 7.236976621484992e-07, "loss": 0.2835, "projector_lr": 2.171092986445498e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.5625, "sft_loss": 0.671875, "step": 2243 }, { "dpo_loss": 0.06787109375, "epoch": 0.36, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 7.234705733262561e-07, "loss": 0.0674, "projector_lr": 2.1704117199787684e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.875, "sft_loss": 0.5703125, "step": 2244 }, { "dpo_loss": 0.458984375, "epoch": 0.36, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 7.232434268857917e-07, "loss": 0.3469, "projector_lr": 2.169730280657375e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.296875, "rewards_train/margins": 2.828125, "rewards_train/rejected": -5.125, "sft_loss": 0.96875, "step": 2245 }, { "dpo_loss": 0.236328125, "epoch": 0.36, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 7.230162228856719e-07, "loss": 0.2612, "projector_lr": 2.1690486686570158e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.15625, "sft_loss": 0.8984375, "step": 2246 }, { "dpo_loss": 0.298828125, "epoch": 0.36, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 7.227889613844775e-07, "loss": 0.1881, "projector_lr": 2.168366884153433e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.90625, "sft_loss": 0.74609375, "step": 2247 }, { "dpo_loss": 0.0242919921875, "epoch": 0.36, "final_loss": 0.0242919921875, "grad_norm": 0.0, "learning_rate": 7.225616424408044e-07, "loss": 0.0607, "projector_lr": 2.1676849273224134e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.625, "sft_loss": 0.71875, "step": 2248 }, { "dpo_loss": 0.11962890625, "epoch": 0.36, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 7.22334266113263e-07, "loss": 0.1196, "projector_lr": 2.167002798339789e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.53125, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.34375, "sft_loss": 0.71875, "step": 2249 }, { "dpo_loss": 0.142578125, "epoch": 0.36, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 7.221068324604783e-07, "loss": 0.5792, "projector_lr": 2.166320497381435e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.75, "sft_loss": 0.6015625, "step": 2250 }, { "dpo_loss": 0.1865234375, "epoch": 0.36, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 7.218793415410906e-07, "loss": 0.2399, "projector_lr": 2.165638024623272e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.03125, "sft_loss": 0.8125, "step": 2251 }, { "dpo_loss": 0.19921875, "epoch": 0.36, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 7.216517934137547e-07, "loss": 0.3709, "projector_lr": 2.164955380241264e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.671875, "rewards_train/margins": 3.0, "rewards_train/rejected": -5.6875, "sft_loss": 0.671875, "step": 2252 }, { "dpo_loss": 0.275390625, "epoch": 0.36, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 7.214241881371401e-07, "loss": 0.31, "projector_lr": 2.1642725644114206e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.453125, "rewards_train/margins": 2.984375, "rewards_train/rejected": -4.4375, "sft_loss": 0.73046875, "step": 2253 }, { "dpo_loss": 0.498046875, "epoch": 0.36, "final_loss": 0.498046875, "grad_norm": 0.0, "learning_rate": 7.211965257699309e-07, "loss": 0.4119, "projector_lr": 2.1635895773097927e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.8125, "sft_loss": 1.03125, "step": 2254 }, { "dpo_loss": 0.416015625, "epoch": 0.36, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 7.209688063708264e-07, "loss": 0.2594, "projector_lr": 2.1629064191124794e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.78125, "sft_loss": 0.65234375, "step": 2255 }, { "dpo_loss": 0.01226806640625, "epoch": 0.36, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 7.2074102999854e-07, "loss": 0.124, "projector_lr": 2.1622230899956203e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.96875, "sft_loss": 0.75390625, "step": 2256 }, { "dpo_loss": 0.8828125, "epoch": 0.36, "final_loss": 0.8828125, "grad_norm": 0.0, "learning_rate": 7.205131967118005e-07, "loss": 0.4532, "projector_lr": 2.1615395901354014e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -3.5, "rewards_train/margins": 2.84375, "rewards_train/rejected": -6.34375, "sft_loss": 0.8203125, "step": 2257 }, { "dpo_loss": 0.02001953125, "epoch": 0.36, "final_loss": 0.02001953125, "grad_norm": 0.0, "learning_rate": 7.202853065693506e-07, "loss": 0.1648, "projector_lr": 2.160855919708052e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.6875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -7.875, "sft_loss": 0.58203125, "step": 2258 }, { "dpo_loss": 0.07568359375, "epoch": 0.36, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 7.200573596299481e-07, "loss": 0.1049, "projector_lr": 2.1601720788898448e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.625, "sft_loss": 0.9921875, "step": 2259 }, { "dpo_loss": 0.265625, "epoch": 0.36, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 7.198293559523657e-07, "loss": 0.2853, "projector_lr": 2.1594880678570972e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 2.03125, "rewards_train/rejected": -4.0625, "sft_loss": 1.0078125, "step": 2260 }, { "dpo_loss": 0.2353515625, "epoch": 0.36, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 7.1960129559539e-07, "loss": 0.1508, "projector_lr": 2.1588038867861706e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.25, "sft_loss": 0.55078125, "step": 2261 }, { "dpo_loss": 0.58203125, "epoch": 0.36, "final_loss": 0.58203125, "grad_norm": 0.0, "learning_rate": 7.19373178617823e-07, "loss": 0.292, "projector_lr": 2.158119535853469e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.796875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -4.28125, "sft_loss": 0.95703125, "step": 2262 }, { "dpo_loss": 0.455078125, "epoch": 0.36, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 7.191450050784805e-07, "loss": 0.3103, "projector_lr": 2.1574350152354416e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.53125, "sft_loss": 0.69921875, "step": 2263 }, { "dpo_loss": 0.043212890625, "epoch": 0.36, "final_loss": 0.043212890625, "grad_norm": 0.0, "learning_rate": 7.189167750361937e-07, "loss": 0.2702, "projector_lr": 2.1567503251085815e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.0, "sft_loss": 0.68359375, "step": 2264 }, { "dpo_loss": 0.3828125, "epoch": 0.36, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 7.186884885498077e-07, "loss": 0.203, "projector_lr": 2.156065465649423e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.0, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.84375, "sft_loss": 0.5390625, "step": 2265 }, { "dpo_loss": 0.115234375, "epoch": 0.36, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 7.184601456781825e-07, "loss": 0.0739, "projector_lr": 2.1553804370345477e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.09375, "rewards_train/margins": 4.0, "rewards_train/rejected": -6.09375, "sft_loss": 1.0234375, "step": 2266 }, { "dpo_loss": 0.255859375, "epoch": 0.36, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 7.182317464801925e-07, "loss": 0.2698, "projector_lr": 2.154695239440578e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.0, "sft_loss": 0.71484375, "step": 2267 }, { "dpo_loss": 0.064453125, "epoch": 0.36, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 7.180032910147269e-07, "loss": 0.254, "projector_lr": 2.154009873044181e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.4375, "sft_loss": 0.84375, "step": 2268 }, { "dpo_loss": 0.2490234375, "epoch": 0.36, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 7.177747793406892e-07, "loss": 0.2521, "projector_lr": 2.153324338022068e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.53125, "sft_loss": 0.80859375, "step": 2269 }, { "dpo_loss": 0.53125, "epoch": 0.36, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 7.17546211516997e-07, "loss": 0.4208, "projector_lr": 2.1526386345509912e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.6875, "sft_loss": 0.62109375, "step": 2270 }, { "dpo_loss": 0.039794921875, "epoch": 0.36, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 7.173175876025831e-07, "loss": 0.1373, "projector_lr": 2.1519527628077495e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.375, "rewards_train/margins": 7.0, "rewards_train/rejected": -8.375, "sft_loss": 0.8125, "step": 2271 }, { "dpo_loss": 0.1630859375, "epoch": 0.36, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 7.170889076563943e-07, "loss": 0.0974, "projector_lr": 2.151266722969183e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.953125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -5.5625, "sft_loss": 0.796875, "step": 2272 }, { "dpo_loss": 0.2373046875, "epoch": 0.36, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 7.168601717373921e-07, "loss": 0.2428, "projector_lr": 2.1505805152121766e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.140625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.09375, "sft_loss": 0.85546875, "step": 2273 }, { "dpo_loss": 1.1328125, "epoch": 0.36, "final_loss": 1.1328125, "grad_norm": 0.0, "learning_rate": 7.166313799045522e-07, "loss": 0.646, "projector_lr": 2.1498941397136566e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -2.5, "rewards_train/margins": 2.25, "rewards_train/rejected": -4.75, "sft_loss": 0.828125, "step": 2274 }, { "dpo_loss": 0.09765625, "epoch": 0.36, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 7.164025322168647e-07, "loss": 0.0667, "projector_lr": 2.1492075966505943e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.84375, "sft_loss": 0.765625, "step": 2275 }, { "dpo_loss": 0.11181640625, "epoch": 0.36, "final_loss": 0.11181640625, "grad_norm": 0.0, "learning_rate": 7.161736287333344e-07, "loss": 0.2143, "projector_lr": 2.1485208862000035e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -8.125, "sft_loss": 1.0546875, "step": 2276 }, { "dpo_loss": 0.01495361328125, "epoch": 0.36, "final_loss": 0.01495361328125, "grad_norm": 0.0, "learning_rate": 7.159446695129803e-07, "loss": 0.1151, "projector_lr": 2.1478340085389414e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.125, "sft_loss": 0.7109375, "step": 2277 }, { "dpo_loss": 0.173828125, "epoch": 0.36, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 7.157156546148357e-07, "loss": 0.2912, "projector_lr": 2.1471469638445075e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.3125, "sft_loss": 1.1640625, "step": 2278 }, { "dpo_loss": 0.208984375, "epoch": 0.36, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 7.154865840979486e-07, "loss": 0.186, "projector_lr": 2.146459752293846e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.25, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.625, "sft_loss": 0.78125, "step": 2279 }, { "dpo_loss": 0.2333984375, "epoch": 0.36, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 7.152574580213806e-07, "loss": 0.3329, "projector_lr": 2.145772374064142e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.5625, "sft_loss": 1.0234375, "step": 2280 }, { "dpo_loss": 0.296875, "epoch": 0.36, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 7.150282764442083e-07, "loss": 0.4033, "projector_lr": 2.145084829332625e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.03125, "rewards_train/margins": 2.859375, "rewards_train/rejected": -4.90625, "sft_loss": 0.875, "step": 2281 }, { "dpo_loss": 0.0322265625, "epoch": 0.37, "final_loss": 0.0322265625, "grad_norm": 0.0, "learning_rate": 7.147990394255228e-07, "loss": 0.0382, "projector_lr": 2.1443971182765686e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.8125, "sft_loss": 0.625, "step": 2282 }, { "dpo_loss": 0.248046875, "epoch": 0.37, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 7.145697470244285e-07, "loss": 0.1431, "projector_lr": 2.1437092410732856e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.796875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.96875, "sft_loss": 0.9921875, "step": 2283 }, { "dpo_loss": 0.119140625, "epoch": 0.37, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 7.14340399300045e-07, "loss": 0.0927, "projector_lr": 2.143021197900135e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.96875, "sft_loss": 0.76171875, "step": 2284 }, { "dpo_loss": 0.09130859375, "epoch": 0.37, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 7.141109963115059e-07, "loss": 0.0749, "projector_lr": 2.142332988934518e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.8125, "sft_loss": 1.078125, "step": 2285 }, { "dpo_loss": 0.1962890625, "epoch": 0.37, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 7.138815381179588e-07, "loss": 0.134, "projector_lr": 2.1416446143538766e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.25, "rewards_train/margins": 3.4375, "rewards_train/rejected": -5.6875, "sft_loss": 0.66015625, "step": 2286 }, { "dpo_loss": 0.09716796875, "epoch": 0.37, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 7.136520247785659e-07, "loss": 0.1087, "projector_lr": 2.140956074335698e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -7.625, "sft_loss": 0.92578125, "step": 2287 }, { "dpo_loss": 0.154296875, "epoch": 0.37, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 7.134224563525033e-07, "loss": 0.1658, "projector_lr": 2.14026736905751e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.65625, "sft_loss": 0.84765625, "step": 2288 }, { "dpo_loss": 0.38671875, "epoch": 0.37, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 7.131928328989616e-07, "loss": 0.4813, "projector_lr": 2.139578498696885e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8125, "rewards_train/margins": 2.25, "rewards_train/rejected": -4.0625, "sft_loss": 0.74609375, "step": 2289 }, { "dpo_loss": 0.1669921875, "epoch": 0.37, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 7.129631544771454e-07, "loss": 0.1123, "projector_lr": 2.138889463431436e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.25, "sft_loss": 1.171875, "step": 2290 }, { "dpo_loss": 0.107421875, "epoch": 0.37, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 7.127334211462735e-07, "loss": 0.0981, "projector_lr": 2.1382002634388206e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.125, "sft_loss": 0.86328125, "step": 2291 }, { "dpo_loss": 0.220703125, "epoch": 0.37, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 7.125036329655787e-07, "loss": 0.1182, "projector_lr": 2.1375108988967362e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.90625, "sft_loss": 0.6328125, "step": 2292 }, { "dpo_loss": 0.546875, "epoch": 0.37, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 7.122737899943083e-07, "loss": 0.328, "projector_lr": 2.136821369982925e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.75, "rewards_train/margins": 2.703125, "rewards_train/rejected": -4.4375, "sft_loss": 0.859375, "step": 2293 }, { "dpo_loss": 0.11376953125, "epoch": 0.37, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 7.120438922917235e-07, "loss": 0.1078, "projector_lr": 2.136131676875171e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.0, "sft_loss": 0.98828125, "step": 2294 }, { "dpo_loss": 0.240234375, "epoch": 0.37, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 7.118139399170998e-07, "loss": 0.4744, "projector_lr": 2.1354418197512997e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -3.90625, "sft_loss": 0.96484375, "step": 2295 }, { "dpo_loss": 0.25390625, "epoch": 0.37, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 7.115839329297265e-07, "loss": 0.1509, "projector_lr": 2.13475179878918e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.65625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.34375, "sft_loss": 0.68359375, "step": 2296 }, { "dpo_loss": 0.275390625, "epoch": 0.37, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 7.11353871388907e-07, "loss": 0.1426, "projector_lr": 2.134061614166721e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.96875, "sft_loss": 0.95703125, "step": 2297 }, { "dpo_loss": 0.1494140625, "epoch": 0.37, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 7.111237553539592e-07, "loss": 0.0761, "projector_lr": 2.133371266061878e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.703125, "rewards_train/rejected": -5.0, "sft_loss": 0.80078125, "step": 2298 }, { "dpo_loss": 0.07958984375, "epoch": 0.37, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 7.108935848842144e-07, "loss": 0.1994, "projector_lr": 2.1326807546526433e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34375, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.34375, "sft_loss": 0.578125, "step": 2299 }, { "dpo_loss": 0.07666015625, "epoch": 0.37, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 7.106633600390186e-07, "loss": 0.2731, "projector_lr": 2.131990080117056e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.53125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.75, "sft_loss": 0.77734375, "step": 2300 }, { "dpo_loss": 0.1083984375, "epoch": 0.37, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 7.104330808777312e-07, "loss": 0.1895, "projector_lr": 2.1312992426331934e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.765625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -5.09375, "sft_loss": 0.8125, "step": 2301 }, { "dpo_loss": 0.181640625, "epoch": 0.37, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 7.102027474597261e-07, "loss": 0.1313, "projector_lr": 2.1306082423791786e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.90625, "sft_loss": 0.92578125, "step": 2302 }, { "dpo_loss": 0.8515625, "epoch": 0.37, "final_loss": 0.8515625, "grad_norm": 0.0, "learning_rate": 7.099723598443909e-07, "loss": 0.4426, "projector_lr": 2.129917079533173e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.96875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -4.4375, "sft_loss": 0.87890625, "step": 2303 }, { "dpo_loss": 0.1953125, "epoch": 0.37, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 7.097419180911275e-07, "loss": 0.4168, "projector_lr": 2.1292257542733828e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.34375, "rewards_train/rejected": -5.125, "sft_loss": 0.83984375, "step": 2304 }, { "dpo_loss": 0.032958984375, "epoch": 0.37, "final_loss": 0.032958984375, "grad_norm": 0.0, "learning_rate": 7.095114222593512e-07, "loss": 0.0969, "projector_lr": 2.128534266778054e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 8.4375, "rewards_train/rejected": -10.25, "sft_loss": 0.4921875, "step": 2305 }, { "dpo_loss": 0.50390625, "epoch": 0.37, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 7.092808724084918e-07, "loss": 0.2829, "projector_lr": 2.1278426172254756e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.9453125, "rewards_train/margins": 3.09375, "rewards_train/rejected": -5.03125, "sft_loss": 0.71484375, "step": 2306 }, { "dpo_loss": 0.15625, "epoch": 0.37, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 7.090502685979927e-07, "loss": 0.2392, "projector_lr": 2.127150805793978e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.28125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -6.34375, "sft_loss": 0.703125, "step": 2307 }, { "dpo_loss": 0.05810546875, "epoch": 0.37, "final_loss": 0.05810546875, "grad_norm": 0.0, "learning_rate": 7.088196108873112e-07, "loss": 0.1561, "projector_lr": 2.126458832661934e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.625, "sft_loss": 0.8125, "step": 2308 }, { "dpo_loss": 0.1904296875, "epoch": 0.37, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 7.085888993359188e-07, "loss": 0.2458, "projector_lr": 2.1257666980077564e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.6875, "sft_loss": 0.75390625, "step": 2309 }, { "dpo_loss": 0.09375, "epoch": 0.37, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 7.083581340033006e-07, "loss": 0.1808, "projector_lr": 2.1250744020099017e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -7.25, "sft_loss": 0.6796875, "step": 2310 }, { "dpo_loss": 0.361328125, "epoch": 0.37, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 7.081273149489556e-07, "loss": 0.2124, "projector_lr": 2.124381944846867e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.046875, "rewards_train/margins": 3.28125, "rewards_train/rejected": -5.3125, "sft_loss": 0.9375, "step": 2311 }, { "dpo_loss": 0.052001953125, "epoch": 0.37, "final_loss": 0.052001953125, "grad_norm": 0.0, "learning_rate": 7.07896442232397e-07, "loss": 0.1455, "projector_lr": 2.1236893266971913e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.03125, "sft_loss": 0.6171875, "step": 2312 }, { "dpo_loss": 0.0311279296875, "epoch": 0.37, "final_loss": 0.0311279296875, "grad_norm": 0.0, "learning_rate": 7.07665515913151e-07, "loss": 0.0615, "projector_lr": 2.122996547739453e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.109375, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.75, "sft_loss": 0.89453125, "step": 2313 }, { "dpo_loss": 0.1474609375, "epoch": 0.37, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 7.074345360507588e-07, "loss": 0.2303, "projector_lr": 2.1223036081522766e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 3.125, "rewards_train/rejected": -5.0, "sft_loss": 0.5625, "step": 2314 }, { "dpo_loss": 0.0517578125, "epoch": 0.37, "final_loss": 0.0517578125, "grad_norm": 0.0, "learning_rate": 7.072035027047743e-07, "loss": 0.1016, "projector_lr": 2.121610508114323e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.53125, "sft_loss": 0.71484375, "step": 2315 }, { "dpo_loss": 0.10009765625, "epoch": 0.37, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 7.069724159347656e-07, "loss": 0.0718, "projector_lr": 2.120917247804297e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.25, "sft_loss": 0.8125, "step": 2316 }, { "dpo_loss": 0.224609375, "epoch": 0.37, "final_loss": 0.224609375, "grad_norm": 0.0, "learning_rate": 7.067412758003153e-07, "loss": 0.1539, "projector_lr": 2.120223827400946e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.6875, "rewards_train/margins": 3.828125, "rewards_train/rejected": -6.53125, "sft_loss": 0.9921875, "step": 2317 }, { "dpo_loss": 0.283203125, "epoch": 0.37, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 7.065100823610183e-07, "loss": 0.2936, "projector_lr": 2.119530247083055e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.46875, "rewards_train/rejected": -4.0625, "sft_loss": 0.9921875, "step": 2318 }, { "dpo_loss": 0.00567626953125, "epoch": 0.37, "final_loss": 0.00567626953125, "grad_norm": 0.0, "learning_rate": 7.062788356764844e-07, "loss": 0.0685, "projector_lr": 2.1188365070294534e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.28125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.71875, "sft_loss": 0.90625, "step": 2319 }, { "dpo_loss": 0.171875, "epoch": 0.37, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 7.060475358063368e-07, "loss": 0.2189, "projector_lr": 2.1181426074190104e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.078125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -5.5, "sft_loss": 0.9609375, "step": 2320 }, { "dpo_loss": 0.07666015625, "epoch": 0.37, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 7.058161828102122e-07, "loss": 0.1266, "projector_lr": 2.117448548430637e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.78125, "sft_loss": 0.64453125, "step": 2321 }, { "dpo_loss": 0.04052734375, "epoch": 0.37, "final_loss": 0.04052734375, "grad_norm": 0.0, "learning_rate": 7.055847767477614e-07, "loss": 0.1319, "projector_lr": 2.1167543302432843e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.8125, "sft_loss": 0.8125, "step": 2322 }, { "dpo_loss": 0.1181640625, "epoch": 0.37, "final_loss": 0.1181640625, "grad_norm": 0.0, "learning_rate": 7.053533176786486e-07, "loss": 0.1178, "projector_lr": 2.116059953035946e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.5625, "rewards_train/rejected": -5.15625, "sft_loss": 0.86328125, "step": 2323 }, { "dpo_loss": 0.236328125, "epoch": 0.37, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 7.051218056625514e-07, "loss": 0.1977, "projector_lr": 2.1153654169876544e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.6875, "sft_loss": 0.671875, "step": 2324 }, { "dpo_loss": 0.04931640625, "epoch": 0.37, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 7.048902407591619e-07, "loss": 0.1551, "projector_lr": 2.114670722277486e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.703125, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.0625, "sft_loss": 0.77734375, "step": 2325 }, { "dpo_loss": 0.2734375, "epoch": 0.37, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 7.046586230281851e-07, "loss": 0.2128, "projector_lr": 2.113975869084555e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.0, "sft_loss": 0.68359375, "step": 2326 }, { "dpo_loss": 0.33203125, "epoch": 0.37, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 7.044269525293396e-07, "loss": 0.5535, "projector_lr": 2.113280857588019e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 3.21875, "rewards_train/rejected": -4.09375, "sft_loss": 0.9296875, "step": 2327 }, { "dpo_loss": 0.322265625, "epoch": 0.37, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 7.04195229322358e-07, "loss": 0.2076, "projector_lr": 2.112585687967074e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.953125, "rewards_train/margins": 2.859375, "rewards_train/rejected": -4.8125, "sft_loss": 0.83203125, "step": 2328 }, { "dpo_loss": 0.06201171875, "epoch": 0.37, "final_loss": 0.06201171875, "grad_norm": 0.0, "learning_rate": 7.039634534669864e-07, "loss": 0.0838, "projector_lr": 2.1118903604009594e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.65625, "sft_loss": 0.86328125, "step": 2329 }, { "dpo_loss": 0.1328125, "epoch": 0.37, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 7.037316250229843e-07, "loss": 0.0801, "projector_lr": 2.111194875068953e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.5, "sft_loss": 1.03125, "step": 2330 }, { "dpo_loss": 0.314453125, "epoch": 0.37, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 7.03499744050125e-07, "loss": 0.2298, "projector_lr": 2.110499232150375e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.625, "sft_loss": 1.1171875, "step": 2331 }, { "dpo_loss": 0.17578125, "epoch": 0.37, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 7.032678106081952e-07, "loss": 0.4091, "projector_lr": 2.1098034318245857e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 7.15625, "rewards_train/rejected": -8.5, "sft_loss": 0.85546875, "step": 2332 }, { "dpo_loss": 0.06591796875, "epoch": 0.37, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 7.030358247569948e-07, "loss": 0.0517, "projector_lr": 2.1091074742709845e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55859375, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.0625, "sft_loss": 0.8046875, "step": 2333 }, { "dpo_loss": 0.265625, "epoch": 0.37, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 7.028037865563381e-07, "loss": 0.1463, "projector_lr": 2.1084113596690144e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.859375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -7.40625, "sft_loss": 0.8125, "step": 2334 }, { "dpo_loss": 0.1318359375, "epoch": 0.37, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 7.025716960660518e-07, "loss": 0.0932, "projector_lr": 2.1077150881981556e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.25, "sft_loss": 0.796875, "step": 2335 }, { "dpo_loss": 0.1923828125, "epoch": 0.37, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 7.02339553345977e-07, "loss": 0.1859, "projector_lr": 2.1070186600379312e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.65625, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.28125, "sft_loss": 0.875, "step": 2336 }, { "dpo_loss": 0.416015625, "epoch": 0.37, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 7.021073584559676e-07, "loss": 0.32, "projector_lr": 2.106322075367903e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.828125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -5.53125, "sft_loss": 0.875, "step": 2337 }, { "dpo_loss": 0.1640625, "epoch": 0.37, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 7.018751114558915e-07, "loss": 0.1437, "projector_lr": 2.1056253343676746e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.59375, "sft_loss": 0.765625, "step": 2338 }, { "dpo_loss": 0.388671875, "epoch": 0.37, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 7.016428124056297e-07, "loss": 0.4362, "projector_lr": 2.1049284372168892e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.734375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -7.3125, "sft_loss": 0.85546875, "step": 2339 }, { "dpo_loss": 0.2470703125, "epoch": 0.37, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 7.014104613650766e-07, "loss": 0.2131, "projector_lr": 2.1042313840952303e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.484375, "rewards_train/margins": 2.609375, "rewards_train/rejected": -5.09375, "sft_loss": 0.828125, "step": 2340 }, { "dpo_loss": 0.04833984375, "epoch": 0.37, "final_loss": 0.04833984375, "grad_norm": 0.0, "learning_rate": 7.011780583941404e-07, "loss": 0.2464, "projector_lr": 2.103534175182421e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.65625, "sft_loss": 0.703125, "step": 2341 }, { "dpo_loss": 0.2431640625, "epoch": 0.37, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 7.009456035527419e-07, "loss": 0.2303, "projector_lr": 2.102836810658226e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.875, "sft_loss": 0.97265625, "step": 2342 }, { "dpo_loss": 0.047607421875, "epoch": 0.37, "final_loss": 0.047607421875, "grad_norm": 0.0, "learning_rate": 7.007130969008161e-07, "loss": 0.1279, "projector_lr": 2.1021392907024484e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.625, "sft_loss": 0.6015625, "step": 2343 }, { "dpo_loss": 0.2109375, "epoch": 0.38, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 7.00480538498311e-07, "loss": 0.1573, "projector_lr": 2.101441615494933e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.671875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.09375, "sft_loss": 0.50390625, "step": 2344 }, { "dpo_loss": 0.068359375, "epoch": 0.38, "final_loss": 0.068359375, "grad_norm": 0.0, "learning_rate": 7.00247928405188e-07, "loss": 0.4577, "projector_lr": 2.1007437852155643e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.03125, "sft_loss": 0.69921875, "step": 2345 }, { "dpo_loss": 0.3046875, "epoch": 0.38, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 7.000152666814218e-07, "loss": 0.1656, "projector_lr": 2.1000458000442654e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.390625, "rewards_train/margins": 1.9375, "rewards_train/rejected": -4.3125, "sft_loss": 0.875, "step": 2346 }, { "dpo_loss": 0.361328125, "epoch": 0.38, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 6.997825533870001e-07, "loss": 0.1889, "projector_lr": 2.0993476601610003e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.984375, "rewards_train/margins": 1.9765625, "rewards_train/rejected": -3.96875, "sft_loss": 0.6640625, "step": 2347 }, { "dpo_loss": 0.05419921875, "epoch": 0.38, "final_loss": 0.05419921875, "grad_norm": 0.0, "learning_rate": 6.995497885819246e-07, "loss": 0.2902, "projector_lr": 2.098649365745774e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.78125, "sft_loss": 0.84765625, "step": 2348 }, { "dpo_loss": 0.09619140625, "epoch": 0.38, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 6.993169723262098e-07, "loss": 0.2364, "projector_lr": 2.0979509169786296e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -4.875, "sft_loss": 0.6484375, "step": 2349 }, { "dpo_loss": 0.220703125, "epoch": 0.38, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 6.990841046798834e-07, "loss": 0.4685, "projector_lr": 2.0972523140396503e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -8.125, "sft_loss": 0.640625, "step": 2350 }, { "dpo_loss": 0.236328125, "epoch": 0.38, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 6.988511857029865e-07, "loss": 0.1673, "projector_lr": 2.0965535571089597e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 3.953125, "rewards_train/rejected": -5.03125, "sft_loss": 1.03125, "step": 2351 }, { "dpo_loss": 0.0213623046875, "epoch": 0.38, "final_loss": 0.0213623046875, "grad_norm": 0.0, "learning_rate": 6.986182154555738e-07, "loss": 0.0801, "projector_lr": 2.0958546463667214e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 7.125, "rewards_train/rejected": -8.25, "sft_loss": 0.546875, "step": 2352 }, { "dpo_loss": 0.72265625, "epoch": 0.38, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 6.983851939977122e-07, "loss": 0.3992, "projector_lr": 2.095155581993137e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.9765625, "rewards_train/margins": 3.015625, "rewards_train/rejected": -5.0, "sft_loss": 0.90625, "step": 2353 }, { "dpo_loss": 0.515625, "epoch": 0.38, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 6.981521213894832e-07, "loss": 0.3267, "projector_lr": 2.0944563641684496e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.96875, "rewards_train/margins": 3.484375, "rewards_train/rejected": -5.46875, "sft_loss": 0.71875, "step": 2354 }, { "dpo_loss": 0.11474609375, "epoch": 0.38, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 6.979189976909802e-07, "loss": 0.0871, "projector_lr": 2.0937569930729405e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.09375, "sft_loss": 0.91796875, "step": 2355 }, { "dpo_loss": 0.2490234375, "epoch": 0.38, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 6.976858229623105e-07, "loss": 0.1466, "projector_lr": 2.093057468886932e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65625, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.21875, "sft_loss": 1.0625, "step": 2356 }, { "dpo_loss": 0.031005859375, "epoch": 0.38, "final_loss": 0.031005859375, "grad_norm": 0.0, "learning_rate": 6.974525972635945e-07, "loss": 0.1871, "projector_lr": 2.0923577917907836e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.03125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -7.59375, "sft_loss": 1.03125, "step": 2357 }, { "dpo_loss": 0.1435546875, "epoch": 0.38, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 6.972193206549653e-07, "loss": 0.1113, "projector_lr": 2.0916579619648964e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.78125, "sft_loss": 0.73828125, "step": 2358 }, { "dpo_loss": 0.5, "epoch": 0.38, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 6.969859931965698e-07, "loss": 0.2918, "projector_lr": 2.0909579795897095e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.1875, "sft_loss": 0.71875, "step": 2359 }, { "dpo_loss": 0.012939453125, "epoch": 0.38, "final_loss": 0.012939453125, "grad_norm": 0.0, "learning_rate": 6.967526149485676e-07, "loss": 0.0172, "projector_lr": 2.090257844845703e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.375, "sft_loss": 0.85546875, "step": 2360 }, { "dpo_loss": 0.5625, "epoch": 0.38, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 6.965191859711313e-07, "loss": 0.4646, "projector_lr": 2.0895575579133937e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.21875, "rewards_train/margins": 1.2578125, "rewards_train/rejected": -3.46875, "sft_loss": 0.8203125, "step": 2361 }, { "dpo_loss": 0.2109375, "epoch": 0.38, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 6.962857063244467e-07, "loss": 0.2857, "projector_lr": 2.0888571189733405e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.5, "sft_loss": 0.73828125, "step": 2362 }, { "dpo_loss": 0.059814453125, "epoch": 0.38, "final_loss": 0.059814453125, "grad_norm": 0.0, "learning_rate": 6.96052176068713e-07, "loss": 0.0407, "projector_lr": 2.088156528206139e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1884765625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.21875, "sft_loss": 0.7265625, "step": 2363 }, { "dpo_loss": 0.3203125, "epoch": 0.38, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 6.958185952641418e-07, "loss": 0.4335, "projector_lr": 2.0874557857924255e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 2.125, "rewards_train/rejected": -3.625, "sft_loss": 0.8046875, "step": 2364 }, { "dpo_loss": 0.064453125, "epoch": 0.38, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 6.955849639709581e-07, "loss": 0.1225, "projector_lr": 2.0867548919128745e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.6875, "sft_loss": 0.828125, "step": 2365 }, { "dpo_loss": 0.0159912109375, "epoch": 0.38, "final_loss": 0.0159912109375, "grad_norm": 0.0, "learning_rate": 6.953512822494002e-07, "loss": 0.0408, "projector_lr": 2.0860538467482006e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.0, "sft_loss": 0.78125, "step": 2366 }, { "dpo_loss": 0.146484375, "epoch": 0.38, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.951175501597188e-07, "loss": 0.4813, "projector_lr": 2.0853526504791567e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.796875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.875, "sft_loss": 0.96875, "step": 2367 }, { "dpo_loss": 0.2255859375, "epoch": 0.38, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 6.948837677621781e-07, "loss": 0.2054, "projector_lr": 2.0846513032865345e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.59375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.96875, "sft_loss": 0.80078125, "step": 2368 }, { "dpo_loss": 0.1611328125, "epoch": 0.38, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 6.94649935117055e-07, "loss": 0.293, "projector_lr": 2.0839498053511653e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.625, "sft_loss": 0.67578125, "step": 2369 }, { "dpo_loss": 0.031982421875, "epoch": 0.38, "final_loss": 0.031982421875, "grad_norm": 0.0, "learning_rate": 6.944160522846394e-07, "loss": 0.308, "projector_lr": 2.0832481568539185e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 6.90625, "rewards_train/rejected": -8.0, "sft_loss": 0.7734375, "step": 2370 }, { "dpo_loss": 0.2109375, "epoch": 0.38, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 6.94182119325234e-07, "loss": 0.2799, "projector_lr": 2.0825463579757022e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.0, "sft_loss": 0.703125, "step": 2371 }, { "dpo_loss": 0.07373046875, "epoch": 0.38, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 6.939481362991547e-07, "loss": 0.0624, "projector_lr": 2.0818444088974643e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.5, "sft_loss": 0.72265625, "step": 2372 }, { "dpo_loss": 0.5390625, "epoch": 0.38, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 6.937141032667302e-07, "loss": 0.282, "projector_lr": 2.0811423098001907e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.875, "rewards_train/margins": 2.125, "rewards_train/rejected": -4.0, "sft_loss": 0.8203125, "step": 2373 }, { "dpo_loss": 0.115234375, "epoch": 0.38, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 6.93480020288302e-07, "loss": 0.1683, "projector_lr": 2.0804400608649063e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.375, "sft_loss": 0.80859375, "step": 2374 }, { "dpo_loss": 0.02099609375, "epoch": 0.38, "final_loss": 0.02099609375, "grad_norm": 0.0, "learning_rate": 6.932458874242247e-07, "loss": 0.2118, "projector_lr": 2.0797376622726745e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16015625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -5.84375, "sft_loss": 0.51171875, "step": 2375 }, { "dpo_loss": 0.236328125, "epoch": 0.38, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 6.930117047348654e-07, "loss": 0.1338, "projector_lr": 2.0790351142045966e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.9375, "sft_loss": 0.75390625, "step": 2376 }, { "dpo_loss": 0.80078125, "epoch": 0.38, "final_loss": 0.80078125, "grad_norm": 0.0, "learning_rate": 6.927774722806047e-07, "loss": 0.4591, "projector_lr": 2.0783324168418143e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.75, "sft_loss": 0.7890625, "step": 2377 }, { "dpo_loss": 0.30078125, "epoch": 0.38, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 6.92543190121835e-07, "loss": 0.1961, "projector_lr": 2.0776295703655054e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.890625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.9375, "sft_loss": 0.9296875, "step": 2378 }, { "dpo_loss": 0.047119140625, "epoch": 0.38, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 6.923088583189626e-07, "loss": 0.1888, "projector_lr": 2.076926574956888e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.40625, "sft_loss": 1.078125, "step": 2379 }, { "dpo_loss": 0.09912109375, "epoch": 0.38, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 6.920744769324057e-07, "loss": 0.1292, "projector_lr": 2.0762234307972175e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.03125, "sft_loss": 0.78125, "step": 2380 }, { "dpo_loss": 0.1689453125, "epoch": 0.38, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 6.918400460225962e-07, "loss": 0.1643, "projector_lr": 2.0755201380677888e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.703125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.875, "sft_loss": 0.65625, "step": 2381 }, { "dpo_loss": 0.12353515625, "epoch": 0.38, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 6.916055656499778e-07, "loss": 0.3778, "projector_lr": 2.0748166969499335e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5625, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.8125, "sft_loss": 0.65625, "step": 2382 }, { "dpo_loss": 0.0244140625, "epoch": 0.38, "final_loss": 0.0244140625, "grad_norm": 0.0, "learning_rate": 6.913710358750074e-07, "loss": 0.0546, "projector_lr": 2.0741131076250226e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.796875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -7.09375, "sft_loss": 0.98828125, "step": 2383 }, { "dpo_loss": 0.57421875, "epoch": 0.38, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 6.911364567581551e-07, "loss": 0.4113, "projector_lr": 2.0734093702744656e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 2.15625, "rewards_train/rejected": -3.265625, "sft_loss": 0.5859375, "step": 2384 }, { "dpo_loss": 0.349609375, "epoch": 0.38, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 6.909018283599029e-07, "loss": 0.3165, "projector_lr": 2.072705485079709e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 4.375, "rewards_train/rejected": -6.125, "sft_loss": 0.95703125, "step": 2385 }, { "dpo_loss": 0.1904296875, "epoch": 0.38, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 6.906671507407463e-07, "loss": 0.1433, "projector_lr": 2.072001452222239e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.25, "rewards_train/margins": 3.390625, "rewards_train/rejected": -5.65625, "sft_loss": 1.0234375, "step": 2386 }, { "dpo_loss": 0.0216064453125, "epoch": 0.38, "final_loss": 0.0216064453125, "grad_norm": 0.0, "learning_rate": 6.904324239611924e-07, "loss": 0.1162, "projector_lr": 2.0712972718835773e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.34375, "sft_loss": 0.87890625, "step": 2387 }, { "dpo_loss": 0.5390625, "epoch": 0.38, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 6.901976480817623e-07, "loss": 0.3307, "projector_lr": 2.070592944245287e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.96875, "rewards_train/margins": 2.921875, "rewards_train/rejected": -4.90625, "sft_loss": 1.1015625, "step": 2388 }, { "dpo_loss": 0.0654296875, "epoch": 0.38, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 6.899628231629886e-07, "loss": 0.248, "projector_lr": 2.0698884694889657e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.482421875, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.625, "sft_loss": 0.61328125, "step": 2389 }, { "dpo_loss": 0.05029296875, "epoch": 0.38, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 6.897279492654176e-07, "loss": 0.1179, "projector_lr": 2.069183847796253e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.53125, "sft_loss": 0.7734375, "step": 2390 }, { "dpo_loss": 0.5546875, "epoch": 0.38, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 6.89493026449607e-07, "loss": 0.287, "projector_lr": 2.0684790793488215e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.328125, "rewards_train/margins": 2.828125, "rewards_train/rejected": -5.15625, "sft_loss": 0.78125, "step": 2391 }, { "dpo_loss": 0.376953125, "epoch": 0.38, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 6.892580547761285e-07, "loss": 0.2177, "projector_lr": 2.0677741643283856e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.0625, "sft_loss": 0.83203125, "step": 2392 }, { "dpo_loss": 0.322265625, "epoch": 0.38, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 6.890230343055652e-07, "loss": 0.2075, "projector_lr": 2.067069102916696e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.34375, "sft_loss": 0.671875, "step": 2393 }, { "dpo_loss": 0.97265625, "epoch": 0.38, "final_loss": 0.97265625, "grad_norm": 0.0, "learning_rate": 6.887879650985134e-07, "loss": 0.6898, "projector_lr": 2.06636389529554e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.90625, "rewards_train/margins": 2.546875, "rewards_train/rejected": -4.4375, "sft_loss": 0.7265625, "step": 2394 }, { "dpo_loss": 0.25390625, "epoch": 0.38, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 6.88552847215582e-07, "loss": 0.6991, "projector_lr": 2.065658541646746e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.34375, "rewards_train/rejected": -5.125, "sft_loss": 0.93359375, "step": 2395 }, { "dpo_loss": 0.404296875, "epoch": 0.38, "final_loss": 0.404296875, "grad_norm": 0.0, "learning_rate": 6.88317680717392e-07, "loss": 0.3522, "projector_lr": 2.064953042152176e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.140625, "rewards_train/margins": 1.859375, "rewards_train/rejected": -3.0, "sft_loss": 0.69140625, "step": 2396 }, { "dpo_loss": 0.0751953125, "epoch": 0.38, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 6.880824656645774e-07, "loss": 0.1347, "projector_lr": 2.0642473969937323e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.75, "sft_loss": 0.890625, "step": 2397 }, { "dpo_loss": 0.291015625, "epoch": 0.38, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 6.878472021177847e-07, "loss": 0.2636, "projector_lr": 2.0635416063533544e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.75, "rewards_train/margins": 3.546875, "rewards_train/rejected": -5.3125, "sft_loss": 0.83984375, "step": 2398 }, { "dpo_loss": 0.181640625, "epoch": 0.38, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 6.876118901376724e-07, "loss": 0.1789, "projector_lr": 2.0628356704130175e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.34375, "sft_loss": 0.75, "step": 2399 }, { "dpo_loss": 0.275390625, "epoch": 0.38, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 6.87376529784912e-07, "loss": 0.1773, "projector_lr": 2.0621295893547363e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 2.1875, "rewards_train/rejected": -4.03125, "sft_loss": 0.75, "step": 2400 }, { "dpo_loss": 0.08935546875, "epoch": 0.38, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 6.871411211201875e-07, "loss": 0.1249, "projector_lr": 2.0614233633605625e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.5625, "sft_loss": 0.8203125, "step": 2401 }, { "dpo_loss": 0.1376953125, "epoch": 0.38, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 6.869056642041948e-07, "loss": 0.1252, "projector_lr": 2.0607169926125846e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9375, "rewards_train/margins": 5.25, "rewards_train/rejected": -7.1875, "sft_loss": 0.8125, "step": 2402 }, { "dpo_loss": 0.345703125, "epoch": 0.38, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 6.866701590976428e-07, "loss": 0.3023, "projector_lr": 2.0600104772929285e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 2.140625, "rewards_train/rejected": -3.140625, "sft_loss": 0.9765625, "step": 2403 }, { "dpo_loss": 0.0732421875, "epoch": 0.38, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 6.864346058612527e-07, "loss": 0.1994, "projector_lr": 2.0593038175837583e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.28125, "sft_loss": 0.5234375, "step": 2404 }, { "dpo_loss": 0.08154296875, "epoch": 0.38, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 6.861990045557579e-07, "loss": 0.0923, "projector_lr": 2.0585970136672736e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.25, "sft_loss": 0.71875, "step": 2405 }, { "dpo_loss": 0.2021484375, "epoch": 0.38, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 6.859633552419044e-07, "loss": 0.114, "projector_lr": 2.0578900657257134e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.53125, "rewards_train/rejected": -5.3125, "sft_loss": 0.796875, "step": 2406 }, { "dpo_loss": 0.1875, "epoch": 0.39, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 6.857276579804504e-07, "loss": 0.3535, "projector_lr": 2.0571829739413513e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.84375, "sft_loss": 0.73828125, "step": 2407 }, { "dpo_loss": 0.01519775390625, "epoch": 0.39, "final_loss": 0.01519775390625, "grad_norm": 0.0, "learning_rate": 6.854919128321668e-07, "loss": 0.0166, "projector_lr": 2.0564757384965006e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.34375, "sft_loss": 0.859375, "step": 2408 }, { "dpo_loss": 0.3359375, "epoch": 0.39, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 6.852561198578363e-07, "loss": 0.2387, "projector_lr": 2.055768359573509e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.03125, "rewards_train/margins": 2.25, "rewards_train/rejected": -4.28125, "sft_loss": 0.8828125, "step": 2409 }, { "dpo_loss": 0.055419921875, "epoch": 0.39, "final_loss": 0.055419921875, "grad_norm": 0.0, "learning_rate": 6.850202791182545e-07, "loss": 0.0872, "projector_lr": 2.0550608373547634e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 5.6875, "rewards_train/rejected": -7.375, "sft_loss": 0.5390625, "step": 2410 }, { "dpo_loss": 0.177734375, "epoch": 0.39, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 6.84784390674229e-07, "loss": 0.11, "projector_lr": 2.054353172022687e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.625, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.4375, "sft_loss": 0.9296875, "step": 2411 }, { "dpo_loss": 0.10693359375, "epoch": 0.39, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 6.845484545865797e-07, "loss": 0.1554, "projector_lr": 2.0536453637597395e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 6.96875, "rewards_train/rejected": -8.25, "sft_loss": 0.73828125, "step": 2412 }, { "dpo_loss": 0.1865234375, "epoch": 0.39, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 6.843124709161391e-07, "loss": 0.313, "projector_lr": 2.0529374127484176e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.40625, "sft_loss": 0.67578125, "step": 2413 }, { "dpo_loss": 0.0140380859375, "epoch": 0.39, "final_loss": 0.0140380859375, "grad_norm": 0.0, "learning_rate": 6.840764397237512e-07, "loss": 0.039, "projector_lr": 2.052229319171254e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1875, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.0625, "sft_loss": 0.83984375, "step": 2414 }, { "dpo_loss": 0.236328125, "epoch": 0.39, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 6.838403610702735e-07, "loss": 0.1757, "projector_lr": 2.0515210832108203e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.15625, "sft_loss": 0.70703125, "step": 2415 }, { "dpo_loss": 0.0556640625, "epoch": 0.39, "final_loss": 0.0556640625, "grad_norm": 0.0, "learning_rate": 6.836042350165742e-07, "loss": 0.0956, "projector_lr": 2.050812705049723e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07958984375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.5625, "sft_loss": 0.625, "step": 2416 }, { "dpo_loss": 0.4453125, "epoch": 0.39, "final_loss": 0.4453125, "grad_norm": 0.0, "learning_rate": 6.833680616235353e-07, "loss": 0.3423, "projector_lr": 2.0501041848706063e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.828125, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.75, "sft_loss": 0.87890625, "step": 2417 }, { "dpo_loss": 0.25390625, "epoch": 0.39, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 6.831318409520496e-07, "loss": 0.1972, "projector_lr": 2.049395522856149e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.125, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.3125, "sft_loss": 0.83984375, "step": 2418 }, { "dpo_loss": 0.291015625, "epoch": 0.39, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 6.828955730630232e-07, "loss": 0.1925, "projector_lr": 2.04868671918907e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.5, "sft_loss": 0.74609375, "step": 2419 }, { "dpo_loss": 0.06396484375, "epoch": 0.39, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 6.826592580173736e-07, "loss": 0.0726, "projector_lr": 2.0479777740521207e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.78125, "sft_loss": 0.890625, "step": 2420 }, { "dpo_loss": 0.7109375, "epoch": 0.39, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 6.824228958760308e-07, "loss": 0.4618, "projector_lr": 2.047268687628092e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.8125, "rewards_train/margins": 3.140625, "rewards_train/rejected": -5.96875, "sft_loss": 1.1796875, "step": 2421 }, { "dpo_loss": 0.1318359375, "epoch": 0.39, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 6.821864866999369e-07, "loss": 0.1051, "projector_lr": 2.046559460099811e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.1875, "sft_loss": 0.71875, "step": 2422 }, { "dpo_loss": 0.1728515625, "epoch": 0.39, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 6.819500305500462e-07, "loss": 0.1135, "projector_lr": 2.045850091650139e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 3.375, "rewards_train/rejected": -5.15625, "sft_loss": 0.92578125, "step": 2423 }, { "dpo_loss": 0.11962890625, "epoch": 0.39, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 6.817135274873253e-07, "loss": 0.0994, "projector_lr": 2.0451405824619757e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.0, "sft_loss": 0.96484375, "step": 2424 }, { "dpo_loss": 0.314453125, "epoch": 0.39, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 6.81476977572752e-07, "loss": 0.2141, "projector_lr": 2.0444309327182562e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.96875, "sft_loss": 0.79296875, "step": 2425 }, { "dpo_loss": 0.1806640625, "epoch": 0.39, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 6.812403808673175e-07, "loss": 0.1489, "projector_lr": 2.0437211426019527e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.390625, "rewards_train/margins": 3.28125, "rewards_train/rejected": -5.6875, "sft_loss": 0.8828125, "step": 2426 }, { "dpo_loss": 0.275390625, "epoch": 0.39, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 6.810037374320238e-07, "loss": 0.3009, "projector_lr": 2.0430112122960717e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.84375, "sft_loss": 0.61328125, "step": 2427 }, { "dpo_loss": 0.2578125, "epoch": 0.39, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 6.807670473278857e-07, "loss": 0.1494, "projector_lr": 2.0423011419836574e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.5078125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -4.8125, "sft_loss": 0.765625, "step": 2428 }, { "dpo_loss": 0.4609375, "epoch": 0.39, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 6.805303106159301e-07, "loss": 0.28, "projector_lr": 2.0415909318477905e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.6875, "sft_loss": 0.75390625, "step": 2429 }, { "dpo_loss": 0.1513671875, "epoch": 0.39, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 6.802935273571955e-07, "loss": 0.0844, "projector_lr": 2.0408805820715864e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.28125, "sft_loss": 1.1328125, "step": 2430 }, { "dpo_loss": 0.0546875, "epoch": 0.39, "final_loss": 0.0546875, "grad_norm": 0.0, "learning_rate": 6.800566976127326e-07, "loss": 0.1859, "projector_lr": 2.0401700928381983e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.875, "sft_loss": 0.84765625, "step": 2431 }, { "dpo_loss": 0.265625, "epoch": 0.39, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 6.798198214436039e-07, "loss": 0.1942, "projector_lr": 2.039459464330812e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.65625, "sft_loss": 0.84765625, "step": 2432 }, { "dpo_loss": 0.59765625, "epoch": 0.39, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 6.795828989108845e-07, "loss": 0.4386, "projector_lr": 2.0387486967326535e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.765625, "rewards_train/margins": 2.140625, "rewards_train/rejected": -3.90625, "sft_loss": 0.78515625, "step": 2433 }, { "dpo_loss": 0.1689453125, "epoch": 0.39, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 6.793459300756604e-07, "loss": 0.1793, "projector_lr": 2.0380377902269814e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.375, "sft_loss": 0.55859375, "step": 2434 }, { "dpo_loss": 0.37109375, "epoch": 0.39, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 6.791089149990305e-07, "loss": 0.2176, "projector_lr": 2.0373267449970916e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -5.0625, "sft_loss": 0.88671875, "step": 2435 }, { "dpo_loss": 0.35546875, "epoch": 0.39, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 6.788718537421052e-07, "loss": 0.2479, "projector_lr": 2.036615561226316e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.796875, "rewards_train/margins": 2.375, "rewards_train/rejected": -4.15625, "sft_loss": 0.640625, "step": 2436 }, { "dpo_loss": 0.134765625, "epoch": 0.39, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 6.786347463660067e-07, "loss": 0.351, "projector_lr": 2.0359042390980204e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.75, "sft_loss": 0.90234375, "step": 2437 }, { "dpo_loss": 0.027587890625, "epoch": 0.39, "final_loss": 0.027587890625, "grad_norm": 0.0, "learning_rate": 6.783975929318694e-07, "loss": 0.0869, "projector_lr": 2.035192778795608e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.09375, "sft_loss": 0.6484375, "step": 2438 }, { "dpo_loss": 0.0908203125, "epoch": 0.39, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 6.781603935008394e-07, "loss": 0.1003, "projector_lr": 2.0344811805025185e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.6875, "sft_loss": 1.1171875, "step": 2439 }, { "dpo_loss": 0.0849609375, "epoch": 0.39, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 6.779231481340747e-07, "loss": 0.0454, "projector_lr": 2.033769444402224e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.4375, "sft_loss": 1.046875, "step": 2440 }, { "dpo_loss": 0.201171875, "epoch": 0.39, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 6.77685856892745e-07, "loss": 0.1202, "projector_lr": 2.0330575706782355e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.375, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.1875, "sft_loss": 0.76171875, "step": 2441 }, { "dpo_loss": 0.04638671875, "epoch": 0.39, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 6.774485198380322e-07, "loss": 0.0476, "projector_lr": 2.0323455595140967e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.0, "sft_loss": 0.71875, "step": 2442 }, { "dpo_loss": 0.2890625, "epoch": 0.39, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 6.772111370311294e-07, "loss": 0.2482, "projector_lr": 2.0316334110933883e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.203125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.90625, "sft_loss": 1.046875, "step": 2443 }, { "dpo_loss": 0.11279296875, "epoch": 0.39, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 6.769737085332422e-07, "loss": 0.1392, "projector_lr": 2.0309211255997266e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.15625, "sft_loss": 0.640625, "step": 2444 }, { "dpo_loss": 0.1689453125, "epoch": 0.39, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 6.767362344055875e-07, "loss": 0.5415, "projector_lr": 2.0302087032167625e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.109375, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.875, "sft_loss": 0.953125, "step": 2445 }, { "dpo_loss": 0.6484375, "epoch": 0.39, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 6.76498714709394e-07, "loss": 0.5445, "projector_lr": 2.029496144128182e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.84375, "sft_loss": 0.85546875, "step": 2446 }, { "dpo_loss": 0.50390625, "epoch": 0.39, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 6.762611495059026e-07, "loss": 0.3127, "projector_lr": 2.028783448517708e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.765625, "rewards_train/margins": 3.828125, "rewards_train/rejected": -5.59375, "sft_loss": 0.91015625, "step": 2447 }, { "dpo_loss": 0.0703125, "epoch": 0.39, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 6.760235388563652e-07, "loss": 0.1997, "projector_lr": 2.028070616569096e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.5, "sft_loss": 0.88671875, "step": 2448 }, { "dpo_loss": 0.412109375, "epoch": 0.39, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 6.757858828220461e-07, "loss": 0.3971, "projector_lr": 2.0273576484661383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.546875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.78125, "sft_loss": 0.6484375, "step": 2449 }, { "dpo_loss": 0.26953125, "epoch": 0.39, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 6.755481814642208e-07, "loss": 0.2755, "projector_lr": 2.0266445443926625e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 2.1875, "rewards_train/rejected": -3.21875, "sft_loss": 0.71484375, "step": 2450 }, { "dpo_loss": 0.107421875, "epoch": 0.39, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 6.753104348441768e-07, "loss": 0.1949, "projector_lr": 2.0259313045325305e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.0625, "sft_loss": 0.6328125, "step": 2451 }, { "dpo_loss": 0.1455078125, "epoch": 0.39, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 6.750726430232132e-07, "loss": 0.0799, "projector_lr": 2.02521792906964e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.25, "sft_loss": 0.77734375, "step": 2452 }, { "dpo_loss": 0.58984375, "epoch": 0.39, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 6.748348060626407e-07, "loss": 0.3971, "projector_lr": 2.0245044181879223e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 3.140625, "rewards_train/rejected": -4.5, "sft_loss": 0.9140625, "step": 2453 }, { "dpo_loss": 0.06689453125, "epoch": 0.39, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 6.745969240237814e-07, "loss": 0.4798, "projector_lr": 2.0237907720713443e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.65625, "sft_loss": 0.73046875, "step": 2454 }, { "dpo_loss": 0.1435546875, "epoch": 0.39, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 6.743589969679696e-07, "loss": 0.1276, "projector_lr": 2.023076990903909e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.15625, "sft_loss": 0.72265625, "step": 2455 }, { "dpo_loss": 0.1875, "epoch": 0.39, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 6.74121024956551e-07, "loss": 0.3423, "projector_lr": 2.022363074869653e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.60546875, "rewards_train/margins": 5.5, "rewards_train/rejected": -4.90625, "sft_loss": 0.6640625, "step": 2456 }, { "dpo_loss": 0.0966796875, "epoch": 0.39, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 6.738830080508824e-07, "loss": 0.1474, "projector_lr": 2.0216490241526474e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.5625, "sft_loss": 0.8984375, "step": 2457 }, { "dpo_loss": 0.146484375, "epoch": 0.39, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.736449463123328e-07, "loss": 0.21, "projector_lr": 2.0209348389369984e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.1875, "sft_loss": 0.796875, "step": 2458 }, { "dpo_loss": 0.146484375, "epoch": 0.39, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.734068398022823e-07, "loss": 0.0959, "projector_lr": 2.020220519406847e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.0625, "sft_loss": 0.7265625, "step": 2459 }, { "dpo_loss": 0.1982421875, "epoch": 0.39, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 6.731686885821231e-07, "loss": 0.2008, "projector_lr": 2.019506065746369e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.71875, "sft_loss": 0.828125, "step": 2460 }, { "dpo_loss": 0.00946044921875, "epoch": 0.39, "final_loss": 0.00946044921875, "grad_norm": 0.0, "learning_rate": 6.729304927132582e-07, "loss": 0.1368, "projector_lr": 2.0187914781397747e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 8.25, "rewards_train/rejected": -9.125, "sft_loss": 0.50390625, "step": 2461 }, { "dpo_loss": 0.1669921875, "epoch": 0.39, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 6.726922522571028e-07, "loss": 0.2181, "projector_lr": 2.0180767567713084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.5, "sft_loss": 1.0078125, "step": 2462 }, { "dpo_loss": 0.52734375, "epoch": 0.39, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 6.724539672750832e-07, "loss": 0.5089, "projector_lr": 2.0173619018252495e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.90625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.75, "sft_loss": 0.859375, "step": 2463 }, { "dpo_loss": 0.328125, "epoch": 0.39, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 6.722156378286371e-07, "loss": 0.4558, "projector_lr": 2.0166469134859116e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.28125, "rewards_train/margins": 2.65625, "rewards_train/rejected": -4.9375, "sft_loss": 0.77734375, "step": 2464 }, { "dpo_loss": 0.048095703125, "epoch": 0.39, "final_loss": 0.048095703125, "grad_norm": 0.0, "learning_rate": 6.719772639792142e-07, "loss": 0.0307, "projector_lr": 2.0159317919376425e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.0625, "sft_loss": 0.703125, "step": 2465 }, { "dpo_loss": 0.10986328125, "epoch": 0.39, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 6.717388457882749e-07, "loss": 0.1315, "projector_lr": 2.0152165373648247e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.90625, "sft_loss": 0.7421875, "step": 2466 }, { "dpo_loss": 0.12353515625, "epoch": 0.39, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 6.715003833172916e-07, "loss": 0.1138, "projector_lr": 2.014501149951875e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -4.0, "sft_loss": 0.75, "step": 2467 }, { "dpo_loss": 0.045654296875, "epoch": 0.39, "final_loss": 0.045654296875, "grad_norm": 0.0, "learning_rate": 6.71261876627748e-07, "loss": 0.0311, "projector_lr": 2.0137856298832444e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.28125, "sft_loss": 0.58984375, "step": 2468 }, { "dpo_loss": 0.279296875, "epoch": 0.4, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 6.710233257811392e-07, "loss": 0.239, "projector_lr": 2.0130699773434177e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.9375, "sft_loss": 0.80859375, "step": 2469 }, { "dpo_loss": 0.30078125, "epoch": 0.4, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 6.707847308389713e-07, "loss": 0.221, "projector_lr": 2.012354192516914e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.953125, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.703125, "sft_loss": 0.953125, "step": 2470 }, { "dpo_loss": 0.009765625, "epoch": 0.4, "final_loss": 0.009765625, "grad_norm": 0.0, "learning_rate": 6.705460918627623e-07, "loss": 0.109, "projector_lr": 2.0116382755882872e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.25, "sft_loss": 0.765625, "step": 2471 }, { "dpo_loss": 0.52734375, "epoch": 0.4, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 6.703074089140413e-07, "loss": 0.4797, "projector_lr": 2.010922226742124e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.28125, "rewards_train/margins": 2.65625, "rewards_train/rejected": -4.9375, "sft_loss": 1.0078125, "step": 2472 }, { "dpo_loss": 0.0230712890625, "epoch": 0.4, "final_loss": 0.0230712890625, "grad_norm": 0.0, "learning_rate": 6.700686820543489e-07, "loss": 0.0843, "projector_lr": 2.0102060461630466e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.40625, "sft_loss": 0.71875, "step": 2473 }, { "dpo_loss": 0.03271484375, "epoch": 0.4, "final_loss": 0.03271484375, "grad_norm": 0.0, "learning_rate": 6.698299113452368e-07, "loss": 0.1171, "projector_lr": 2.0094897340357107e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.859375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.4375, "sft_loss": 0.7265625, "step": 2474 }, { "dpo_loss": 0.2373046875, "epoch": 0.4, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 6.695910968482681e-07, "loss": 0.2094, "projector_lr": 2.0087732905448044e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.28125, "sft_loss": 1.1015625, "step": 2475 }, { "dpo_loss": 0.046142578125, "epoch": 0.4, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 6.693522386250172e-07, "loss": 0.3589, "projector_lr": 2.0080567158750516e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.65625, "sft_loss": 0.796875, "step": 2476 }, { "dpo_loss": 0.138671875, "epoch": 0.4, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 6.691133367370698e-07, "loss": 0.1612, "projector_lr": 2.0073400102112097e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.1875, "sft_loss": 0.609375, "step": 2477 }, { "dpo_loss": 0.4296875, "epoch": 0.4, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 6.688743912460229e-07, "loss": 0.4242, "projector_lr": 2.0066231737380686e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.46875, "sft_loss": 0.97265625, "step": 2478 }, { "dpo_loss": 0.162109375, "epoch": 0.4, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 6.686354022134844e-07, "loss": 0.2608, "projector_lr": 2.0059062066404535e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.484375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -7.8125, "sft_loss": 0.75390625, "step": 2479 }, { "dpo_loss": 0.169921875, "epoch": 0.4, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 6.683963697010742e-07, "loss": 0.2655, "projector_lr": 2.005189109103223e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.84375, "sft_loss": 0.57421875, "step": 2480 }, { "dpo_loss": 0.40625, "epoch": 0.4, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 6.681572937704225e-07, "loss": 0.2725, "projector_lr": 2.0044718813112677e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.015625, "rewards_train/margins": 1.71875, "rewards_train/rejected": -3.734375, "sft_loss": 0.875, "step": 2481 }, { "dpo_loss": 0.10888671875, "epoch": 0.4, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 6.679181744831713e-07, "loss": 0.1252, "projector_lr": 2.0037545234495137e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.75, "sft_loss": 0.94921875, "step": 2482 }, { "dpo_loss": 0.0137939453125, "epoch": 0.4, "final_loss": 0.0137939453125, "grad_norm": 0.0, "learning_rate": 6.676790119009733e-07, "loss": 0.0533, "projector_lr": 2.00303703570292e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1875, "rewards_train/margins": 7.75, "rewards_train/rejected": -8.9375, "sft_loss": 0.6484375, "step": 2483 }, { "dpo_loss": 0.23828125, "epoch": 0.4, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 6.67439806085493e-07, "loss": 0.3067, "projector_lr": 2.0023194182564793e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.5625, "sft_loss": 0.62890625, "step": 2484 }, { "dpo_loss": 0.248046875, "epoch": 0.4, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 6.672005570984056e-07, "loss": 0.3148, "projector_lr": 2.001601671295217e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.302734375, "rewards_train/margins": 3.4375, "rewards_train/rejected": -3.75, "sft_loss": 0.61328125, "step": 2485 }, { "dpo_loss": 0.474609375, "epoch": 0.4, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 6.669612650013974e-07, "loss": 0.2775, "projector_lr": 2.0008837950041923e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.46875, "sft_loss": 0.6640625, "step": 2486 }, { "dpo_loss": 0.076171875, "epoch": 0.4, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 6.667219298561661e-07, "loss": 0.0785, "projector_lr": 2.0001657895684987e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.71875, "sft_loss": 0.6953125, "step": 2487 }, { "dpo_loss": 0.0206298828125, "epoch": 0.4, "final_loss": 0.0206298828125, "grad_norm": 0.0, "learning_rate": 6.664825517244202e-07, "loss": 0.061, "projector_lr": 1.9994476551732606e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.3125, "sft_loss": 0.5703125, "step": 2488 }, { "dpo_loss": 0.2158203125, "epoch": 0.4, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 6.662431306678795e-07, "loss": 0.2761, "projector_lr": 1.9987293920036388e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 3.84375, "rewards_train/rejected": -4.9375, "sft_loss": 0.828125, "step": 2489 }, { "dpo_loss": 0.19921875, "epoch": 0.4, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 6.660036667482747e-07, "loss": 0.1284, "projector_lr": 1.9980110002448246e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.4375, "sft_loss": 0.9140625, "step": 2490 }, { "dpo_loss": 0.287109375, "epoch": 0.4, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 6.657641600273478e-07, "loss": 0.1983, "projector_lr": 1.9972924800820438e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 1.7109375, "rewards_train/rejected": -2.75, "sft_loss": 0.79296875, "step": 2491 }, { "dpo_loss": 0.240234375, "epoch": 0.4, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 6.655246105668517e-07, "loss": 0.2638, "projector_lr": 1.9965738317005552e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.875, "sft_loss": 0.71484375, "step": 2492 }, { "dpo_loss": 0.12451171875, "epoch": 0.4, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 6.652850184285502e-07, "loss": 0.1023, "projector_lr": 1.9958550552856504e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.4375, "sft_loss": 0.7890625, "step": 2493 }, { "dpo_loss": 0.0517578125, "epoch": 0.4, "final_loss": 0.0517578125, "grad_norm": 0.0, "learning_rate": 6.650453836742182e-07, "loss": 0.1656, "projector_lr": 1.9951361510226544e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17578125, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.65625, "sft_loss": 0.71875, "step": 2494 }, { "dpo_loss": 0.21484375, "epoch": 0.4, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 6.648057063656414e-07, "loss": 0.2279, "projector_lr": 1.994417119096924e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.828125, "rewards_train/margins": 3.65625, "rewards_train/rejected": -5.5, "sft_loss": 0.99609375, "step": 2495 }, { "dpo_loss": 0.01025390625, "epoch": 0.4, "final_loss": 0.01025390625, "grad_norm": 0.0, "learning_rate": 6.64565986564617e-07, "loss": 0.0667, "projector_lr": 1.9936979596938512e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.78125, "sft_loss": 0.77734375, "step": 2496 }, { "dpo_loss": 0.1689453125, "epoch": 0.4, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 6.643262243329526e-07, "loss": 0.1283, "projector_lr": 1.992978672998858e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.71875, "sft_loss": 0.80078125, "step": 2497 }, { "dpo_loss": 0.037841796875, "epoch": 0.4, "final_loss": 0.037841796875, "grad_norm": 0.0, "learning_rate": 6.640864197324672e-07, "loss": 0.1833, "projector_lr": 1.9922592591974015e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.09375, "sft_loss": 0.57421875, "step": 2498 }, { "dpo_loss": 0.3984375, "epoch": 0.4, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 6.638465728249901e-07, "loss": 0.2746, "projector_lr": 1.9915397184749705e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.46875, "sft_loss": 0.6484375, "step": 2499 }, { "dpo_loss": 0.48828125, "epoch": 0.4, "final_loss": 0.48828125, "grad_norm": 0.0, "learning_rate": 6.636066836723621e-07, "loss": 0.3249, "projector_lr": 1.990820051017087e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.3125, "rewards_train/margins": 2.5625, "rewards_train/rejected": -4.875, "sft_loss": 0.875, "step": 2500 }, { "dpo_loss": 0.043212890625, "epoch": 0.4, "final_loss": 0.043212890625, "grad_norm": 0.0, "learning_rate": 6.633667523364349e-07, "loss": 0.0451, "projector_lr": 1.990100257009305e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.1875, "sft_loss": 0.62890625, "step": 2501 }, { "dpo_loss": 0.0546875, "epoch": 0.4, "final_loss": 0.0546875, "grad_norm": 0.0, "learning_rate": 6.631267788790704e-07, "loss": 0.0519, "projector_lr": 1.9893803366372112e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.84375, "sft_loss": 0.765625, "step": 2502 }, { "dpo_loss": 0.318359375, "epoch": 0.4, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 6.628867633621424e-07, "loss": 0.227, "projector_lr": 1.9886602900864273e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.96875, "rewards_train/margins": 3.375, "rewards_train/rejected": -5.34375, "sft_loss": 0.96484375, "step": 2503 }, { "dpo_loss": 0.0126953125, "epoch": 0.4, "final_loss": 0.0126953125, "grad_norm": 0.0, "learning_rate": 6.626467058475343e-07, "loss": 0.2017, "projector_lr": 1.987940117542603e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.0, "sft_loss": 0.62890625, "step": 2504 }, { "dpo_loss": 0.30078125, "epoch": 0.4, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 6.624066063971415e-07, "loss": 0.1935, "projector_lr": 1.9872198191914246e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 3.015625, "rewards_train/rejected": -4.6875, "sft_loss": 0.7734375, "step": 2505 }, { "dpo_loss": 0.10986328125, "epoch": 0.4, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 6.621664650728694e-07, "loss": 0.0742, "projector_lr": 1.9864993952186086e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.234375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -7.03125, "sft_loss": 0.875, "step": 2506 }, { "dpo_loss": 0.0419921875, "epoch": 0.4, "final_loss": 0.0419921875, "grad_norm": 0.0, "learning_rate": 6.619262819366349e-07, "loss": 0.1698, "projector_lr": 1.9857788458099045e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.375, "sft_loss": 0.65625, "step": 2507 }, { "dpo_loss": 0.09521484375, "epoch": 0.4, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 6.616860570503647e-07, "loss": 0.2663, "projector_lr": 1.985058171151094e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.03125, "sft_loss": 0.76171875, "step": 2508 }, { "dpo_loss": 0.1123046875, "epoch": 0.4, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 6.614457904759972e-07, "loss": 0.1238, "projector_lr": 1.9843373714279917e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.625, "sft_loss": 0.75, "step": 2509 }, { "dpo_loss": 0.2734375, "epoch": 0.4, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 6.612054822754813e-07, "loss": 0.3269, "projector_lr": 1.983616446826444e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.0, "sft_loss": 0.62109375, "step": 2510 }, { "dpo_loss": 0.0322265625, "epoch": 0.4, "final_loss": 0.0322265625, "grad_norm": 0.0, "learning_rate": 6.609651325107763e-07, "loss": 0.1591, "projector_lr": 1.9828953975323287e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.625, "sft_loss": 0.68359375, "step": 2511 }, { "dpo_loss": 0.2431640625, "epoch": 0.4, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 6.607247412438525e-07, "loss": 0.1373, "projector_lr": 1.9821742237315574e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.3125, "sft_loss": 0.65625, "step": 2512 }, { "dpo_loss": 0.10400390625, "epoch": 0.4, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 6.604843085366908e-07, "loss": 0.0599, "projector_lr": 1.9814529256100723e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.96875, "sft_loss": 0.7265625, "step": 2513 }, { "dpo_loss": 0.408203125, "epoch": 0.4, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 6.602438344512831e-07, "loss": 0.3833, "projector_lr": 1.9807315033538493e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.9375, "sft_loss": 0.53125, "step": 2514 }, { "dpo_loss": 0.146484375, "epoch": 0.4, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.600033190496313e-07, "loss": 0.1352, "projector_lr": 1.9800099571488937e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.53125, "sft_loss": 0.734375, "step": 2515 }, { "dpo_loss": 0.09765625, "epoch": 0.4, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 6.597627623937485e-07, "loss": 0.1378, "projector_lr": 1.979288287181246e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.5, "sft_loss": 0.85546875, "step": 2516 }, { "dpo_loss": 0.07861328125, "epoch": 0.4, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 6.595221645456584e-07, "loss": 0.1905, "projector_lr": 1.9785664936369754e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5, "rewards_train/margins": 5.78125, "rewards_train/rejected": -7.28125, "sft_loss": 1.0625, "step": 2517 }, { "dpo_loss": 0.046142578125, "epoch": 0.4, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 6.592815255673953e-07, "loss": 0.1755, "projector_lr": 1.977844576702186e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.75, "rewards_train/rejected": -6.25, "sft_loss": 0.8515625, "step": 2518 }, { "dpo_loss": 0.3203125, "epoch": 0.4, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 6.590408455210036e-07, "loss": 0.3159, "projector_lr": 1.977122536563011e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.53125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.71875, "sft_loss": 0.83203125, "step": 2519 }, { "dpo_loss": 0.302734375, "epoch": 0.4, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 6.588001244685391e-07, "loss": 0.3227, "projector_lr": 1.976400373405617e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 2.421875, "rewards_train/rejected": -3.921875, "sft_loss": 0.78125, "step": 2520 }, { "dpo_loss": 0.0123291015625, "epoch": 0.4, "final_loss": 0.0123291015625, "grad_norm": 0.0, "learning_rate": 6.585593624720676e-07, "loss": 0.0737, "projector_lr": 1.975678087416203e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.46875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.875, "sft_loss": 0.640625, "step": 2521 }, { "dpo_loss": 0.0086669921875, "epoch": 0.4, "final_loss": 0.0086669921875, "grad_norm": 0.0, "learning_rate": 6.583185595936656e-07, "loss": 0.0119, "projector_lr": 1.9749556787809967e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 6.1875, "rewards_train/rejected": -8.0625, "sft_loss": 0.96484375, "step": 2522 }, { "dpo_loss": 0.236328125, "epoch": 0.4, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 6.580777158954203e-07, "loss": 0.2446, "projector_lr": 1.9742331476862607e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 3.875, "rewards_train/rejected": -5.3125, "sft_loss": 0.9375, "step": 2523 }, { "dpo_loss": 0.466796875, "epoch": 0.4, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 6.578368314394291e-07, "loss": 0.3467, "projector_lr": 1.9735104943182874e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3125, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.0625, "sft_loss": 0.7734375, "step": 2524 }, { "dpo_loss": 0.140625, "epoch": 0.4, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 6.575959062878004e-07, "loss": 0.1622, "projector_lr": 1.9727877188634015e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.34375, "sft_loss": 0.8828125, "step": 2525 }, { "dpo_loss": 0.240234375, "epoch": 0.4, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 6.573549405026526e-07, "loss": 0.1632, "projector_lr": 1.972064821507958e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8125, "rewards_train/margins": 3.578125, "rewards_train/rejected": -5.375, "sft_loss": 0.78125, "step": 2526 }, { "dpo_loss": 0.6875, "epoch": 0.4, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 6.571139341461147e-07, "loss": 0.4799, "projector_lr": 1.971341802438344e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.078125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -6.125, "sft_loss": 0.796875, "step": 2527 }, { "dpo_loss": 0.369140625, "epoch": 0.4, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 6.568728872803265e-07, "loss": 0.3571, "projector_lr": 1.9706186618409797e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 2.375, "rewards_train/rejected": -4.21875, "sft_loss": 1.0859375, "step": 2528 }, { "dpo_loss": 0.016845703125, "epoch": 0.4, "final_loss": 0.016845703125, "grad_norm": 0.0, "learning_rate": 6.566317999674377e-07, "loss": 0.053, "projector_lr": 1.969895399902313e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.1875, "sft_loss": 0.81640625, "step": 2529 }, { "dpo_loss": 0.384765625, "epoch": 0.4, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 6.563906722696089e-07, "loss": 0.2447, "projector_lr": 1.969172016808827e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.5625, "sft_loss": 0.85546875, "step": 2530 }, { "dpo_loss": 0.0162353515625, "epoch": 0.4, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 6.561495042490108e-07, "loss": 0.0256, "projector_lr": 1.9684485127470325e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -7.21875, "sft_loss": 0.78125, "step": 2531 }, { "dpo_loss": 0.251953125, "epoch": 0.41, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 6.559082959678247e-07, "loss": 0.23, "projector_lr": 1.967724887903474e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.5, "sft_loss": 0.9296875, "step": 2532 }, { "dpo_loss": 0.11865234375, "epoch": 0.41, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 6.556670474882421e-07, "loss": 0.1307, "projector_lr": 1.9670011424647264e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.484375, "rewards_train/margins": 6.03125, "rewards_train/rejected": -8.5, "sft_loss": 0.7578125, "step": 2533 }, { "dpo_loss": 0.076171875, "epoch": 0.41, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 6.554257588724651e-07, "loss": 0.0612, "projector_lr": 1.9662772766173954e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.390625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -6.71875, "sft_loss": 0.8125, "step": 2534 }, { "dpo_loss": 0.103515625, "epoch": 0.41, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 6.551844301827059e-07, "loss": 0.2214, "projector_lr": 1.9655532905481176e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.46875, "rewards_train/margins": 4.125, "rewards_train/rejected": -6.59375, "sft_loss": 0.80078125, "step": 2535 }, { "dpo_loss": 0.55859375, "epoch": 0.41, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 6.549430614811871e-07, "loss": 0.4034, "projector_lr": 1.9648291844435617e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.71875, "sft_loss": 0.71484375, "step": 2536 }, { "dpo_loss": 0.06298828125, "epoch": 0.41, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 6.547016528301419e-07, "loss": 0.1685, "projector_lr": 1.9641049584904257e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.71875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.40625, "sft_loss": 0.71875, "step": 2537 }, { "dpo_loss": 0.08154296875, "epoch": 0.41, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 6.544602042918132e-07, "loss": 0.2469, "projector_lr": 1.96338061287544e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -6.0, "sft_loss": 0.7421875, "step": 2538 }, { "dpo_loss": 0.2412109375, "epoch": 0.41, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 6.542187159284551e-07, "loss": 0.1671, "projector_lr": 1.9626561477853657e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.8125, "sft_loss": 1.015625, "step": 2539 }, { "dpo_loss": 0.330078125, "epoch": 0.41, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 6.539771878023308e-07, "loss": 0.2156, "projector_lr": 1.9619315634069926e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.71875, "rewards_train/rejected": -4.3125, "sft_loss": 0.69921875, "step": 2540 }, { "dpo_loss": 0.236328125, "epoch": 0.41, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 6.537356199757147e-07, "loss": 0.1469, "projector_lr": 1.961206859927144e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.515625, "rewards_train/rejected": -3.953125, "sft_loss": 0.69921875, "step": 2541 }, { "dpo_loss": 0.25, "epoch": 0.41, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 6.534940125108909e-07, "loss": 0.3538, "projector_lr": 1.9604820375326733e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.90625, "sft_loss": 0.7265625, "step": 2542 }, { "dpo_loss": 0.0634765625, "epoch": 0.41, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 6.532523654701543e-07, "loss": 0.0515, "projector_lr": 1.959757096410463e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.90625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.9375, "sft_loss": 0.671875, "step": 2543 }, { "dpo_loss": 0.189453125, "epoch": 0.41, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 6.530106789158091e-07, "loss": 0.3419, "projector_lr": 1.9590320367474273e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.1875, "sft_loss": 0.8203125, "step": 2544 }, { "dpo_loss": 0.140625, "epoch": 0.41, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 6.527689529101705e-07, "loss": 0.1792, "projector_lr": 1.9583068587305118e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.90625, "sft_loss": 0.9375, "step": 2545 }, { "dpo_loss": 0.41796875, "epoch": 0.41, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 6.525271875155638e-07, "loss": 0.2289, "projector_lr": 1.9575815625466915e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 2.4375, "rewards_train/rejected": -4.125, "sft_loss": 0.859375, "step": 2546 }, { "dpo_loss": 0.047607421875, "epoch": 0.41, "final_loss": 0.047607421875, "grad_norm": 0.0, "learning_rate": 6.522853827943239e-07, "loss": 0.1195, "projector_lr": 1.956856148382972e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 6.6875, "rewards_train/rejected": -7.8125, "sft_loss": 0.7109375, "step": 2547 }, { "dpo_loss": 0.15625, "epoch": 0.41, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 6.520435388087962e-07, "loss": 0.2019, "projector_lr": 1.956130616426389e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.609375, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.875, "sft_loss": 0.609375, "step": 2548 }, { "dpo_loss": 0.44140625, "epoch": 0.41, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 6.518016556213364e-07, "loss": 0.3419, "projector_lr": 1.955404966864009e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.9375, "rewards_train/margins": 3.546875, "rewards_train/rejected": -5.46875, "sft_loss": 0.91796875, "step": 2549 }, { "dpo_loss": 0.0150146484375, "epoch": 0.41, "final_loss": 0.0150146484375, "grad_norm": 0.0, "learning_rate": 6.5155973329431e-07, "loss": 0.0263, "projector_lr": 1.9546791998829303e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.84375, "sft_loss": 0.65234375, "step": 2550 }, { "dpo_loss": 0.05078125, "epoch": 0.41, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 6.513177718900927e-07, "loss": 0.198, "projector_lr": 1.9539533156702784e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.28125, "sft_loss": 0.765625, "step": 2551 }, { "dpo_loss": 0.17578125, "epoch": 0.41, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 6.510757714710704e-07, "loss": 0.1958, "projector_lr": 1.9532273144132114e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.5625, "sft_loss": 0.65625, "step": 2552 }, { "dpo_loss": 0.384765625, "epoch": 0.41, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 6.508337320996387e-07, "loss": 0.287, "projector_lr": 1.9525011962989163e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.796875, "rewards_train/rejected": -5.25, "sft_loss": 0.87890625, "step": 2553 }, { "dpo_loss": 0.30078125, "epoch": 0.41, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 6.505916538382034e-07, "loss": 0.1658, "projector_lr": 1.9517749615146104e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.21875, "sft_loss": 0.69140625, "step": 2554 }, { "dpo_loss": 0.50390625, "epoch": 0.41, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 6.50349536749181e-07, "loss": 0.3454, "projector_lr": 1.9510486102475432e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.484375, "rewards_train/margins": 0.68359375, "rewards_train/rejected": -3.171875, "sft_loss": 1.09375, "step": 2555 }, { "dpo_loss": 0.09033203125, "epoch": 0.41, "final_loss": 0.09033203125, "grad_norm": 0.0, "learning_rate": 6.501073808949968e-07, "loss": 0.0502, "projector_lr": 1.9503221426849907e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.21875, "sft_loss": 0.9140625, "step": 2556 }, { "dpo_loss": 0.0478515625, "epoch": 0.41, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 6.498651863380871e-07, "loss": 0.0331, "projector_lr": 1.9495955590142615e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.84375, "sft_loss": 0.81640625, "step": 2557 }, { "dpo_loss": 0.146484375, "epoch": 0.41, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.496229531408975e-07, "loss": 0.196, "projector_lr": 1.9488688594226926e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.0625, "sft_loss": 0.6484375, "step": 2558 }, { "dpo_loss": 0.365234375, "epoch": 0.41, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 6.493806813658841e-07, "loss": 0.3306, "projector_lr": 1.9481420440976524e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -6.25, "sft_loss": 0.82421875, "step": 2559 }, { "dpo_loss": 0.1328125, "epoch": 0.41, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 6.491383710755125e-07, "loss": 0.3342, "projector_lr": 1.9474151132265375e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.34375, "sft_loss": 0.9375, "step": 2560 }, { "dpo_loss": 0.039306640625, "epoch": 0.41, "final_loss": 0.039306640625, "grad_norm": 0.0, "learning_rate": 6.488960223322587e-07, "loss": 0.0252, "projector_lr": 1.946688066996776e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.9375, "sft_loss": 0.72265625, "step": 2561 }, { "dpo_loss": 0.06298828125, "epoch": 0.41, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 6.48653635198608e-07, "loss": 0.3337, "projector_lr": 1.945960905595824e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.28125, "sft_loss": 1.1015625, "step": 2562 }, { "dpo_loss": 0.115234375, "epoch": 0.41, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 6.484112097370563e-07, "loss": 0.4321, "projector_lr": 1.945233629211169e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.90625, "sft_loss": 0.8359375, "step": 2563 }, { "dpo_loss": 0.392578125, "epoch": 0.41, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 6.481687460101088e-07, "loss": 0.3447, "projector_lr": 1.9445062380303265e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.6875, "sft_loss": 0.875, "step": 2564 }, { "dpo_loss": 0.240234375, "epoch": 0.41, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 6.479262440802808e-07, "loss": 0.2408, "projector_lr": 1.9437787322408427e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.65625, "sft_loss": 0.796875, "step": 2565 }, { "dpo_loss": 0.126953125, "epoch": 0.41, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 6.476837040100976e-07, "loss": 0.234, "projector_lr": 1.943051112030293e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.15625, "sft_loss": 1.8359375, "step": 2566 }, { "dpo_loss": 0.061767578125, "epoch": 0.41, "final_loss": 0.061767578125, "grad_norm": 0.0, "learning_rate": 6.47441125862094e-07, "loss": 0.0873, "projector_lr": 1.942323377586282e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.96875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -8.125, "sft_loss": 1.0234375, "step": 2567 }, { "dpo_loss": 0.33203125, "epoch": 0.41, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 6.471985096988151e-07, "loss": 0.2606, "projector_lr": 1.9415955290964452e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.5625, "sft_loss": 0.94921875, "step": 2568 }, { "dpo_loss": 0.0732421875, "epoch": 0.41, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 6.46955855582815e-07, "loss": 0.298, "projector_lr": 1.940867566748445e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.71875, "sft_loss": 0.546875, "step": 2569 }, { "dpo_loss": 0.12109375, "epoch": 0.41, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 6.467131635766584e-07, "loss": 0.2103, "projector_lr": 1.9401394907299753e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.21875, "sft_loss": 0.953125, "step": 2570 }, { "dpo_loss": 0.08349609375, "epoch": 0.41, "final_loss": 0.08349609375, "grad_norm": 0.0, "learning_rate": 6.464704337429194e-07, "loss": 0.1476, "projector_lr": 1.9394113012287584e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.875, "sft_loss": 0.99609375, "step": 2571 }, { "dpo_loss": 0.314453125, "epoch": 0.41, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 6.462276661441819e-07, "loss": 0.2543, "projector_lr": 1.938682998432546e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.78125, "sft_loss": 0.75390625, "step": 2572 }, { "dpo_loss": 0.2275390625, "epoch": 0.41, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 6.459848608430396e-07, "loss": 0.2218, "projector_lr": 1.937954582529119e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.34375, "sft_loss": 0.8046875, "step": 2573 }, { "dpo_loss": 0.46875, "epoch": 0.41, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 6.457420179020958e-07, "loss": 0.2993, "projector_lr": 1.9372260537062876e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.5, "sft_loss": 0.79296875, "step": 2574 }, { "dpo_loss": 0.2421875, "epoch": 0.41, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 6.454991373839637e-07, "loss": 0.2618, "projector_lr": 1.936497412151891e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.734375, "sft_loss": 0.75390625, "step": 2575 }, { "dpo_loss": 0.36328125, "epoch": 0.41, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 6.452562193512657e-07, "loss": 0.2247, "projector_lr": 1.9357686580537974e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.4375, "sft_loss": 0.80859375, "step": 2576 }, { "dpo_loss": 0.21484375, "epoch": 0.41, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 6.450132638666348e-07, "loss": 0.1255, "projector_lr": 1.9350397915999044e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.34375, "sft_loss": 1.0078125, "step": 2577 }, { "dpo_loss": 0.07958984375, "epoch": 0.41, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 6.447702709927126e-07, "loss": 0.2722, "projector_lr": 1.9343108129781377e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.875, "sft_loss": 0.58203125, "step": 2578 }, { "dpo_loss": 0.22265625, "epoch": 0.41, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 6.445272407921508e-07, "loss": 0.1559, "projector_lr": 1.9335817223764528e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.46875, "sft_loss": 0.65234375, "step": 2579 }, { "dpo_loss": 0.53125, "epoch": 0.41, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 6.442841733276112e-07, "loss": 0.293, "projector_lr": 1.932852519982834e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 2.109375, "rewards_train/rejected": -2.9375, "sft_loss": 0.890625, "step": 2580 }, { "dpo_loss": 0.046875, "epoch": 0.41, "final_loss": 0.046875, "grad_norm": 0.0, "learning_rate": 6.440410686617643e-07, "loss": 0.1852, "projector_lr": 1.932123205985293e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.5625, "sft_loss": 1.2109375, "step": 2581 }, { "dpo_loss": 0.328125, "epoch": 0.41, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 6.437979268572909e-07, "loss": 0.2202, "projector_lr": 1.931393780571873e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0625, "rewards_train/margins": 3.53125, "rewards_train/rejected": -4.59375, "sft_loss": 0.7890625, "step": 2582 }, { "dpo_loss": 0.4375, "epoch": 0.41, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 6.435547479768811e-07, "loss": 0.2656, "projector_lr": 1.9306642439306436e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.6875, "sft_loss": 0.67578125, "step": 2583 }, { "dpo_loss": 0.275390625, "epoch": 0.41, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 6.433115320832346e-07, "loss": 0.2134, "projector_lr": 1.929934596249704e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.8125, "sft_loss": 0.90234375, "step": 2584 }, { "dpo_loss": 0.435546875, "epoch": 0.41, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 6.430682792390605e-07, "loss": 0.3215, "projector_lr": 1.9292048377171817e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.65625, "sft_loss": 0.70703125, "step": 2585 }, { "dpo_loss": 0.123046875, "epoch": 0.41, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 6.428249895070778e-07, "loss": 0.2727, "projector_lr": 1.9284749685212332e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.0, "sft_loss": 0.98046875, "step": 2586 }, { "dpo_loss": 0.2060546875, "epoch": 0.41, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 6.425816629500143e-07, "loss": 0.1625, "projector_lr": 1.9277449888500432e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.875, "rewards_train/rejected": -5.1875, "sft_loss": 1.1015625, "step": 2587 }, { "dpo_loss": 0.1494140625, "epoch": 0.41, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 6.423382996306083e-07, "loss": 0.1767, "projector_lr": 1.927014898891825e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.6875, "sft_loss": 0.96875, "step": 2588 }, { "dpo_loss": 0.337890625, "epoch": 0.41, "final_loss": 0.337890625, "grad_norm": 0.0, "learning_rate": 6.420948996116068e-07, "loss": 0.1924, "projector_lr": 1.9262846988348207e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.71875, "sft_loss": 0.70703125, "step": 2589 }, { "dpo_loss": 0.36328125, "epoch": 0.41, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 6.418514629557664e-07, "loss": 0.1932, "projector_lr": 1.9255543888672994e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.96875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -6.125, "sft_loss": 0.8671875, "step": 2590 }, { "dpo_loss": 0.0157470703125, "epoch": 0.41, "final_loss": 0.0157470703125, "grad_norm": 0.0, "learning_rate": 6.416079897258536e-07, "loss": 0.2597, "projector_lr": 1.9248239691775608e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.90625, "sft_loss": 0.921875, "step": 2591 }, { "dpo_loss": 0.146484375, "epoch": 0.41, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.413644799846434e-07, "loss": 0.3472, "projector_lr": 1.92409343995393e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.71875, "sft_loss": 0.8125, "step": 2592 }, { "dpo_loss": 0.046630859375, "epoch": 0.41, "final_loss": 0.046630859375, "grad_norm": 0.0, "learning_rate": 6.411209337949213e-07, "loss": 0.1915, "projector_lr": 1.923362801384764e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.427734375, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.375, "sft_loss": 0.5390625, "step": 2593 }, { "dpo_loss": 0.2431640625, "epoch": 0.42, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 6.408773512194814e-07, "loss": 0.1627, "projector_lr": 1.922632053658444e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.875, "sft_loss": 0.66796875, "step": 2594 }, { "dpo_loss": 0.62890625, "epoch": 0.42, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 6.406337323211276e-07, "loss": 0.5732, "projector_lr": 1.921901196963383e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.53125, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.15625, "sft_loss": 0.546875, "step": 2595 }, { "dpo_loss": 0.1611328125, "epoch": 0.42, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 6.403900771626731e-07, "loss": 0.2362, "projector_lr": 1.9211702314880193e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.65625, "sft_loss": 0.7578125, "step": 2596 }, { "dpo_loss": 0.0634765625, "epoch": 0.42, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 6.401463858069404e-07, "loss": 0.2211, "projector_lr": 1.920439157420821e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.21875, "sft_loss": 0.9921875, "step": 2597 }, { "dpo_loss": 0.0849609375, "epoch": 0.42, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 6.39902658316761e-07, "loss": 0.1818, "projector_lr": 1.9197079749502832e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -6.21875, "sft_loss": 0.94921875, "step": 2598 }, { "dpo_loss": 0.045166015625, "epoch": 0.42, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 6.396588947549764e-07, "loss": 0.146, "projector_lr": 1.9189766842649293e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.53125, "sft_loss": 0.7890625, "step": 2599 }, { "dpo_loss": 0.2158203125, "epoch": 0.42, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 6.394150951844369e-07, "loss": 0.2815, "projector_lr": 1.9182452855533107e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.46875, "sft_loss": 0.6171875, "step": 2600 }, { "dpo_loss": 0.08935546875, "epoch": 0.42, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 6.391712596680021e-07, "loss": 0.6542, "projector_lr": 1.9175137790040064e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2890625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.0, "sft_loss": 0.65234375, "step": 2601 }, { "dpo_loss": 0.2734375, "epoch": 0.42, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 6.389273882685413e-07, "loss": 0.2669, "projector_lr": 1.916782164805624e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.71875, "sft_loss": 0.6796875, "step": 2602 }, { "dpo_loss": 0.255859375, "epoch": 0.42, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 6.386834810489324e-07, "loss": 0.3096, "projector_lr": 1.9160504431467972e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.84375, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.84375, "sft_loss": 0.84765625, "step": 2603 }, { "dpo_loss": 0.275390625, "epoch": 0.42, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 6.384395380720631e-07, "loss": 0.1839, "projector_lr": 1.9153186142161896e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.875, "sft_loss": 0.90234375, "step": 2604 }, { "dpo_loss": 0.271484375, "epoch": 0.42, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 6.381955594008299e-07, "loss": 0.1532, "projector_lr": 1.91458667820249e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.78125, "sft_loss": 0.7734375, "step": 2605 }, { "dpo_loss": 0.42578125, "epoch": 0.42, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 6.379515450981388e-07, "loss": 0.4961, "projector_lr": 1.9138546352944162e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.09375, "sft_loss": 0.7265625, "step": 2606 }, { "dpo_loss": 0.1923828125, "epoch": 0.42, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 6.37707495226905e-07, "loss": 0.1551, "projector_lr": 1.913122485680715e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.85546875, "step": 2607 }, { "dpo_loss": 0.2265625, "epoch": 0.42, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 6.374634098500527e-07, "loss": 0.1236, "projector_lr": 1.912390229550158e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 7.25, "rewards_train/rejected": -8.6875, "sft_loss": 0.80078125, "step": 2608 }, { "dpo_loss": 0.1044921875, "epoch": 0.42, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 6.372192890305151e-07, "loss": 0.2473, "projector_lr": 1.9116578670915456e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.125, "sft_loss": 0.48046875, "step": 2609 }, { "dpo_loss": 0.72265625, "epoch": 0.42, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 6.369751328312349e-07, "loss": 0.5127, "projector_lr": 1.910925398493705e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 3.140625, "rewards_train/rejected": -5.0, "sft_loss": 0.88671875, "step": 2610 }, { "dpo_loss": 0.00186920166015625, "epoch": 0.42, "final_loss": 0.00186920166015625, "grad_norm": 0.0, "learning_rate": 6.36730941315164e-07, "loss": 0.0374, "projector_lr": 1.9101928239454923e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 8.125, "rewards_train/rejected": -9.4375, "sft_loss": 0.63671875, "step": 2611 }, { "dpo_loss": 0.57421875, "epoch": 0.42, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 6.364867145452628e-07, "loss": 0.3428, "projector_lr": 1.9094601436357885e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4375, "rewards_train/margins": 1.453125, "rewards_train/rejected": -2.890625, "sft_loss": 0.8359375, "step": 2612 }, { "dpo_loss": 0.057373046875, "epoch": 0.42, "final_loss": 0.057373046875, "grad_norm": 0.0, "learning_rate": 6.362424525845014e-07, "loss": 0.1309, "projector_lr": 1.908727357753504e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.71875, "sft_loss": 0.6953125, "step": 2613 }, { "dpo_loss": 0.07275390625, "epoch": 0.42, "final_loss": 0.07275390625, "grad_norm": 0.0, "learning_rate": 6.359981554958587e-07, "loss": 0.0536, "projector_lr": 1.907994466487576e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.25, "sft_loss": 0.6953125, "step": 2614 }, { "dpo_loss": 0.1650390625, "epoch": 0.42, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 6.357538233423224e-07, "loss": 0.1964, "projector_lr": 1.9072614700269673e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.625, "sft_loss": 0.76171875, "step": 2615 }, { "dpo_loss": 0.02880859375, "epoch": 0.42, "final_loss": 0.02880859375, "grad_norm": 0.0, "learning_rate": 6.355094561868901e-07, "loss": 0.1978, "projector_lr": 1.9065283685606705e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.71875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.46875, "sft_loss": 0.6796875, "step": 2616 }, { "dpo_loss": 0.1865234375, "epoch": 0.42, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 6.352650540925674e-07, "loss": 0.1372, "projector_lr": 1.9057951622777021e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.515625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.375, "sft_loss": 0.8046875, "step": 2617 }, { "dpo_loss": 0.09716796875, "epoch": 0.42, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 6.350206171223696e-07, "loss": 0.077, "projector_lr": 1.9050618513671087e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.9375, "sft_loss": 0.70703125, "step": 2618 }, { "dpo_loss": 0.1572265625, "epoch": 0.42, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 6.347761453393206e-07, "loss": 0.1296, "projector_lr": 1.9043284360179617e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.015625, "rewards_train/rejected": -4.1875, "sft_loss": 0.75, "step": 2619 }, { "dpo_loss": 0.00970458984375, "epoch": 0.42, "final_loss": 0.00970458984375, "grad_norm": 0.0, "learning_rate": 6.345316388064536e-07, "loss": 0.2147, "projector_lr": 1.9035949164193612e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 7.9375, "rewards_train/rejected": -8.75, "sft_loss": 0.6171875, "step": 2620 }, { "dpo_loss": 0.166015625, "epoch": 0.42, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 6.342870975868106e-07, "loss": 0.1301, "projector_lr": 1.9028612927604317e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.28125, "sft_loss": 1.046875, "step": 2621 }, { "dpo_loss": 0.019287109375, "epoch": 0.42, "final_loss": 0.019287109375, "grad_norm": 0.0, "learning_rate": 6.340425217434424e-07, "loss": 0.3409, "projector_lr": 1.9021275652303274e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.46875, "sft_loss": 0.67578125, "step": 2622 }, { "dpo_loss": 0.1865234375, "epoch": 0.42, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 6.337979113394089e-07, "loss": 0.2676, "projector_lr": 1.9013937340182267e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.1875, "sft_loss": 1.15625, "step": 2623 }, { "dpo_loss": 0.13671875, "epoch": 0.42, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 6.335532664377789e-07, "loss": 0.0801, "projector_lr": 1.9006597993133366e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.53125, "sft_loss": 0.8203125, "step": 2624 }, { "dpo_loss": 0.0751953125, "epoch": 0.42, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 6.333085871016301e-07, "loss": 0.1808, "projector_lr": 1.8999257613048905e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.5, "sft_loss": 0.84375, "step": 2625 }, { "dpo_loss": 0.5546875, "epoch": 0.42, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 6.330638733940491e-07, "loss": 0.301, "projector_lr": 1.8991916201821475e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.875, "rewards_train/rejected": -5.28125, "sft_loss": 0.81640625, "step": 2626 }, { "dpo_loss": 0.1279296875, "epoch": 0.42, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 6.328191253781313e-07, "loss": 0.0763, "projector_lr": 1.8984573761343941e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.65625, "sft_loss": 0.953125, "step": 2627 }, { "dpo_loss": 0.4296875, "epoch": 0.42, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 6.325743431169807e-07, "loss": 0.3327, "projector_lr": 1.8977230293509425e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.6875, "sft_loss": 1.0, "step": 2628 }, { "dpo_loss": 0.072265625, "epoch": 0.42, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 6.32329526673711e-07, "loss": 0.2073, "projector_lr": 1.8969885800211331e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.15625, "sft_loss": 0.80859375, "step": 2629 }, { "dpo_loss": 0.08056640625, "epoch": 0.42, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 6.320846761114434e-07, "loss": 0.0854, "projector_lr": 1.8962540283343305e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.15625, "sft_loss": 0.80859375, "step": 2630 }, { "dpo_loss": 0.33984375, "epoch": 0.42, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 6.318397914933091e-07, "loss": 0.2517, "projector_lr": 1.8955193744799275e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -5.03125, "sft_loss": 0.7578125, "step": 2631 }, { "dpo_loss": 0.58984375, "epoch": 0.42, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 6.315948728824472e-07, "loss": 0.4083, "projector_lr": 1.894784618647342e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 3.875, "rewards_train/rejected": -5.59375, "sft_loss": 0.84765625, "step": 2632 }, { "dpo_loss": 0.291015625, "epoch": 0.42, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 6.313499203420063e-07, "loss": 0.2072, "projector_lr": 1.894049761026019e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.486328125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.6875, "sft_loss": 0.53125, "step": 2633 }, { "dpo_loss": 0.6953125, "epoch": 0.42, "final_loss": 0.6953125, "grad_norm": 0.0, "learning_rate": 6.311049339351433e-07, "loss": 0.5979, "projector_lr": 1.89331480180543e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.25, "sft_loss": 0.703125, "step": 2634 }, { "dpo_loss": 0.18359375, "epoch": 0.42, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 6.308599137250237e-07, "loss": 0.3582, "projector_lr": 1.8925797411750711e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.875, "sft_loss": 0.703125, "step": 2635 }, { "dpo_loss": 0.365234375, "epoch": 0.42, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 6.306148597748222e-07, "loss": 0.3301, "projector_lr": 1.8918445793244668e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.90625, "sft_loss": 1.0078125, "step": 2636 }, { "dpo_loss": 0.09130859375, "epoch": 0.42, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 6.303697721477218e-07, "loss": 0.1894, "projector_lr": 1.8911093164431656e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.46875, "sft_loss": 1.03125, "step": 2637 }, { "dpo_loss": 0.0390625, "epoch": 0.42, "final_loss": 0.0390625, "grad_norm": 0.0, "learning_rate": 6.301246509069143e-07, "loss": 0.1287, "projector_lr": 1.8903739527207432e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.0625, "sft_loss": 0.86328125, "step": 2638 }, { "dpo_loss": 0.263671875, "epoch": 0.42, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 6.298794961156004e-07, "loss": 0.173, "projector_lr": 1.889638488346801e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.03125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.78125, "sft_loss": 1.0546875, "step": 2639 }, { "dpo_loss": 0.703125, "epoch": 0.42, "final_loss": 0.703125, "grad_norm": 0.0, "learning_rate": 6.296343078369891e-07, "loss": 0.4676, "projector_lr": 1.888902923510967e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.40625, "sft_loss": 0.7421875, "step": 2640 }, { "dpo_loss": 0.228515625, "epoch": 0.42, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 6.293890861342981e-07, "loss": 0.3573, "projector_lr": 1.8881672584028944e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.9375, "sft_loss": 0.71875, "step": 2641 }, { "dpo_loss": 0.16796875, "epoch": 0.42, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 6.291438310707538e-07, "loss": 0.159, "projector_lr": 1.8874314932122617e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.84375, "sft_loss": 0.92578125, "step": 2642 }, { "dpo_loss": 0.1396484375, "epoch": 0.42, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 6.288985427095914e-07, "loss": 0.1531, "projector_lr": 1.8866956281287743e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.625, "sft_loss": 0.66796875, "step": 2643 }, { "dpo_loss": 0.11083984375, "epoch": 0.42, "final_loss": 0.11083984375, "grad_norm": 0.0, "learning_rate": 6.286532211140543e-07, "loss": 0.1698, "projector_lr": 1.885959663342163e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.734375, "rewards_train/margins": 3.59375, "rewards_train/rejected": -5.34375, "sft_loss": 1.140625, "step": 2644 }, { "dpo_loss": 0.10009765625, "epoch": 0.42, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 6.284078663473948e-07, "loss": 0.0928, "projector_lr": 1.8852235990421847e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.0625, "sft_loss": 0.765625, "step": 2645 }, { "dpo_loss": 0.13671875, "epoch": 0.42, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 6.281624784728734e-07, "loss": 0.1666, "projector_lr": 1.8844874354186204e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.9375, "sft_loss": 0.5234375, "step": 2646 }, { "dpo_loss": 0.310546875, "epoch": 0.42, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 6.279170575537595e-07, "loss": 0.4855, "projector_lr": 1.8837511726612788e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 2.109375, "rewards_train/rejected": -3.453125, "sft_loss": 0.671875, "step": 2647 }, { "dpo_loss": 0.04833984375, "epoch": 0.42, "final_loss": 0.04833984375, "grad_norm": 0.0, "learning_rate": 6.276716036533308e-07, "loss": 0.1134, "projector_lr": 1.8830148109599924e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -7.46875, "sft_loss": 0.82421875, "step": 2648 }, { "dpo_loss": 0.1845703125, "epoch": 0.42, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 6.274261168348735e-07, "loss": 0.1321, "projector_lr": 1.8822783505046205e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.40625, "sft_loss": 0.63671875, "step": 2649 }, { "dpo_loss": 0.2119140625, "epoch": 0.42, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 6.271805971616824e-07, "loss": 0.3246, "projector_lr": 1.8815417914850473e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.0, "sft_loss": 0.58203125, "step": 2650 }, { "dpo_loss": 0.400390625, "epoch": 0.42, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 6.269350446970606e-07, "loss": 0.4094, "projector_lr": 1.8808051340911819e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 2.78125, "rewards_train/rejected": -4.59375, "sft_loss": 0.9609375, "step": 2651 }, { "dpo_loss": 0.494140625, "epoch": 0.42, "final_loss": 0.494140625, "grad_norm": 0.0, "learning_rate": 6.266894595043201e-07, "loss": 0.3958, "projector_lr": 1.8800683785129603e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 1.7734375, "rewards_train/rejected": -3.171875, "sft_loss": 1.0234375, "step": 2652 }, { "dpo_loss": 0.306640625, "epoch": 0.42, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 6.264438416467806e-07, "loss": 0.3205, "projector_lr": 1.879331524940342e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 2.15625, "rewards_train/rejected": -3.015625, "sft_loss": 0.69140625, "step": 2653 }, { "dpo_loss": 0.019287109375, "epoch": 0.42, "final_loss": 0.019287109375, "grad_norm": 0.0, "learning_rate": 6.261981911877711e-07, "loss": 0.0347, "projector_lr": 1.8785945735633136e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.609375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.9375, "sft_loss": 1.2109375, "step": 2654 }, { "dpo_loss": 0.400390625, "epoch": 0.42, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 6.259525081906282e-07, "loss": 0.4351, "projector_lr": 1.8778575245718846e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.53125, "sft_loss": 0.78125, "step": 2655 }, { "dpo_loss": 0.318359375, "epoch": 0.42, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 6.257067927186973e-07, "loss": 0.1826, "projector_lr": 1.877120378156092e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -5.125, "sft_loss": 1.3046875, "step": 2656 }, { "dpo_loss": 0.52734375, "epoch": 0.43, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 6.25461044835332e-07, "loss": 0.3724, "projector_lr": 1.8763831345059963e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 3.1875, "rewards_train/rejected": -5.0625, "sft_loss": 0.97265625, "step": 2657 }, { "dpo_loss": 0.1640625, "epoch": 0.43, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 6.252152646038947e-07, "loss": 0.14, "projector_lr": 1.8756457938116842e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.65625, "sft_loss": 0.7265625, "step": 2658 }, { "dpo_loss": 0.1533203125, "epoch": 0.43, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 6.249694520877555e-07, "loss": 0.1143, "projector_lr": 1.8749083562632664e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.5625, "sft_loss": 0.61328125, "step": 2659 }, { "dpo_loss": 0.036376953125, "epoch": 0.43, "final_loss": 0.036376953125, "grad_norm": 0.0, "learning_rate": 6.24723607350293e-07, "loss": 0.2987, "projector_lr": 1.8741708220508794e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.34375, "sft_loss": 0.83984375, "step": 2660 }, { "dpo_loss": 0.1728515625, "epoch": 0.43, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 6.244777304548947e-07, "loss": 0.3388, "projector_lr": 1.8734331913646843e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.8125, "sft_loss": 0.6796875, "step": 2661 }, { "dpo_loss": 0.08349609375, "epoch": 0.43, "final_loss": 0.08349609375, "grad_norm": 0.0, "learning_rate": 6.242318214649556e-07, "loss": 0.178, "projector_lr": 1.8726954643948668e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.21875, "sft_loss": 0.75390625, "step": 2662 }, { "dpo_loss": 0.107421875, "epoch": 0.43, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 6.239858804438794e-07, "loss": 0.2191, "projector_lr": 1.8719576413316382e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.125, "sft_loss": 0.74609375, "step": 2663 }, { "dpo_loss": 0.263671875, "epoch": 0.43, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 6.237399074550777e-07, "loss": 0.1427, "projector_lr": 1.8712197223652333e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.65625, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.84375, "sft_loss": 1.0390625, "step": 2664 }, { "dpo_loss": 0.2314453125, "epoch": 0.43, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 6.23493902561971e-07, "loss": 0.3363, "projector_lr": 1.8704817076859135e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.6875, "sft_loss": 0.890625, "step": 2665 }, { "dpo_loss": 0.265625, "epoch": 0.43, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 6.232478658279875e-07, "loss": 0.4201, "projector_lr": 1.8697435974839626e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.125, "sft_loss": 0.64453125, "step": 2666 }, { "dpo_loss": 0.1611328125, "epoch": 0.43, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 6.230017973165637e-07, "loss": 0.2183, "projector_lr": 1.8690053919496912e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.78125, "sft_loss": 0.6328125, "step": 2667 }, { "dpo_loss": 0.15625, "epoch": 0.43, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 6.227556970911441e-07, "loss": 0.1848, "projector_lr": 1.8682670912734327e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.40625, "sft_loss": 0.71875, "step": 2668 }, { "dpo_loss": 0.15234375, "epoch": 0.43, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 6.22509565215182e-07, "loss": 0.1736, "projector_lr": 1.867528695645546e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.78125, "sft_loss": 0.62890625, "step": 2669 }, { "dpo_loss": 0.0625, "epoch": 0.43, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 6.222634017521382e-07, "loss": 0.2706, "projector_lr": 1.8667902052564147e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.25, "sft_loss": 0.875, "step": 2670 }, { "dpo_loss": 0.123046875, "epoch": 0.43, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 6.22017206765482e-07, "loss": 0.121, "projector_lr": 1.8660516202964461e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.96875, "sft_loss": 0.875, "step": 2671 }, { "dpo_loss": 0.119140625, "epoch": 0.43, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 6.217709803186908e-07, "loss": 0.2316, "projector_lr": 1.8653129409560724e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.78125, "sft_loss": 0.76171875, "step": 2672 }, { "dpo_loss": 0.06591796875, "epoch": 0.43, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 6.215247224752499e-07, "loss": 0.1651, "projector_lr": 1.8645741674257498e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.6875, "sft_loss": 0.72265625, "step": 2673 }, { "dpo_loss": 0.5078125, "epoch": 0.43, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 6.212784332986529e-07, "loss": 0.3515, "projector_lr": 1.863835299895959e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.109375, "rewards_train/margins": 1.90625, "rewards_train/rejected": -4.03125, "sft_loss": 0.8125, "step": 2674 }, { "dpo_loss": 0.046630859375, "epoch": 0.43, "final_loss": 0.046630859375, "grad_norm": 0.0, "learning_rate": 6.210321128524015e-07, "loss": 0.0849, "projector_lr": 1.8630963385572048e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.90625, "sft_loss": 1.0703125, "step": 2675 }, { "dpo_loss": 0.10693359375, "epoch": 0.43, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 6.207857612000054e-07, "loss": 0.1992, "projector_lr": 1.8623572836000162e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.375, "sft_loss": 0.79296875, "step": 2676 }, { "dpo_loss": 0.1904296875, "epoch": 0.43, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 6.205393784049822e-07, "loss": 0.2853, "projector_lr": 1.8616181352149466e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.9375, "sft_loss": 0.953125, "step": 2677 }, { "dpo_loss": 0.41015625, "epoch": 0.43, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 6.202929645308577e-07, "loss": 0.3741, "projector_lr": 1.860878893592573e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.53125, "sft_loss": 1.203125, "step": 2678 }, { "dpo_loss": 0.271484375, "epoch": 0.43, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 6.200465196411657e-07, "loss": 0.2426, "projector_lr": 1.8601395589234973e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.96875, "sft_loss": 0.7890625, "step": 2679 }, { "dpo_loss": 0.07080078125, "epoch": 0.43, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 6.19800043799448e-07, "loss": 0.0717, "projector_lr": 1.8594001313983442e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.890625, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.75, "sft_loss": 0.68359375, "step": 2680 }, { "dpo_loss": 0.06103515625, "epoch": 0.43, "final_loss": 0.06103515625, "grad_norm": 0.0, "learning_rate": 6.195535370692543e-07, "loss": 0.0461, "projector_lr": 1.8586606112077632e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.65625, "sft_loss": 0.703125, "step": 2681 }, { "dpo_loss": 0.33203125, "epoch": 0.43, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 6.193069995141425e-07, "loss": 0.2132, "projector_lr": 1.8579209985424274e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.375, "sft_loss": 0.5859375, "step": 2682 }, { "dpo_loss": 0.2099609375, "epoch": 0.43, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 6.19060431197678e-07, "loss": 0.1306, "projector_lr": 1.8571812935930338e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.15625, "sft_loss": 0.71484375, "step": 2683 }, { "dpo_loss": 0.37109375, "epoch": 0.43, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 6.188138321834344e-07, "loss": 0.3905, "projector_lr": 1.8564414965503034e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.59375, "sft_loss": 0.875, "step": 2684 }, { "dpo_loss": 0.310546875, "epoch": 0.43, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 6.185672025349936e-07, "loss": 0.2265, "projector_lr": 1.8557016076049807e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.59375, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.65625, "sft_loss": 0.6796875, "step": 2685 }, { "dpo_loss": 0.0203857421875, "epoch": 0.43, "final_loss": 0.0203857421875, "grad_norm": 0.0, "learning_rate": 6.183205423159444e-07, "loss": 0.0355, "projector_lr": 1.8549616269478334e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.625, "sft_loss": 0.78125, "step": 2686 }, { "dpo_loss": 0.294921875, "epoch": 0.43, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 6.180738515898845e-07, "loss": 0.3381, "projector_lr": 1.8542215547696538e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.6875, "sft_loss": 0.80078125, "step": 2687 }, { "dpo_loss": 0.057373046875, "epoch": 0.43, "final_loss": 0.057373046875, "grad_norm": 0.0, "learning_rate": 6.178271304204191e-07, "loss": 0.1706, "projector_lr": 1.8534813912612574e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.9375, "sft_loss": 0.765625, "step": 2688 }, { "dpo_loss": 0.1796875, "epoch": 0.43, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 6.17580378871161e-07, "loss": 0.1767, "projector_lr": 1.852741136613483e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.53125, "sft_loss": 0.7890625, "step": 2689 }, { "dpo_loss": 0.380859375, "epoch": 0.43, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 6.173335970057311e-07, "loss": 0.3422, "projector_lr": 1.8520007910171937e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.03125, "sft_loss": 0.77734375, "step": 2690 }, { "dpo_loss": 0.283203125, "epoch": 0.43, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 6.170867848877581e-07, "loss": 0.3284, "projector_lr": 1.8512603546632746e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.75, "sft_loss": 0.88671875, "step": 2691 }, { "dpo_loss": 0.53515625, "epoch": 0.43, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 6.168399425808786e-07, "loss": 0.3031, "projector_lr": 1.8505198277426359e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0, "rewards_train/margins": 2.046875, "rewards_train/rejected": -3.046875, "sft_loss": 0.671875, "step": 2692 }, { "dpo_loss": 0.0791015625, "epoch": 0.43, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 6.165930701487365e-07, "loss": 0.0663, "projector_lr": 1.8497792104462094e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.5, "sft_loss": 0.625, "step": 2693 }, { "dpo_loss": 0.1220703125, "epoch": 0.43, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 6.163461676549839e-07, "loss": 0.1043, "projector_lr": 1.849038502964952e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.875, "sft_loss": 0.53125, "step": 2694 }, { "dpo_loss": 0.140625, "epoch": 0.43, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 6.160992351632806e-07, "loss": 0.1644, "projector_lr": 1.848297705489842e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.8125, "sft_loss": 0.734375, "step": 2695 }, { "dpo_loss": 0.1806640625, "epoch": 0.43, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 6.158522727372943e-07, "loss": 0.1864, "projector_lr": 1.8475568182118831e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.625, "sft_loss": 0.70703125, "step": 2696 }, { "dpo_loss": 0.1083984375, "epoch": 0.43, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 6.156052804407e-07, "loss": 0.0918, "projector_lr": 1.8468158413221002e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.59375, "sft_loss": 0.87109375, "step": 2697 }, { "dpo_loss": 0.244140625, "epoch": 0.43, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 6.153582583371806e-07, "loss": 0.3195, "projector_lr": 1.846074775011542e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 2.5, "rewards_train/rejected": -4.5, "sft_loss": 0.8203125, "step": 2698 }, { "dpo_loss": 0.10009765625, "epoch": 0.43, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 6.151112064904268e-07, "loss": 0.1099, "projector_lr": 1.8453336194712805e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.71875, "sft_loss": 0.77734375, "step": 2699 }, { "dpo_loss": 0.10986328125, "epoch": 0.43, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 6.148641249641368e-07, "loss": 0.2973, "projector_lr": 1.8445923748924104e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.96875, "sft_loss": 0.90234375, "step": 2700 }, { "dpo_loss": 0.10595703125, "epoch": 0.43, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 6.146170138220165e-07, "loss": 0.1536, "projector_lr": 1.8438510414660496e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.65625, "sft_loss": 0.640625, "step": 2701 }, { "dpo_loss": 0.1279296875, "epoch": 0.43, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 6.143698731277795e-07, "loss": 0.0775, "projector_lr": 1.8431096193833384e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.6875, "sft_loss": 0.5, "step": 2702 }, { "dpo_loss": 0.5390625, "epoch": 0.43, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 6.14122702945147e-07, "loss": 0.3912, "projector_lr": 1.8423681088354407e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 2.53125, "rewards_train/rejected": -3.4375, "sft_loss": 0.73046875, "step": 2703 }, { "dpo_loss": 0.038330078125, "epoch": 0.43, "final_loss": 0.038330078125, "grad_norm": 0.0, "learning_rate": 6.138755033378476e-07, "loss": 0.107, "projector_lr": 1.8416265100135427e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 6.65625, "rewards_train/rejected": -7.09375, "sft_loss": 0.671875, "step": 2704 }, { "dpo_loss": 0.1513671875, "epoch": 0.43, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 6.136282743696178e-07, "loss": 0.3594, "projector_lr": 1.8408848231088534e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.5625, "sft_loss": 0.7109375, "step": 2705 }, { "dpo_loss": 0.3671875, "epoch": 0.43, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 6.133810161042017e-07, "loss": 0.2471, "projector_lr": 1.8401430483126052e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.21875, "sft_loss": 0.73828125, "step": 2706 }, { "dpo_loss": 0.271484375, "epoch": 0.43, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 6.131337286053504e-07, "loss": 0.4639, "projector_lr": 1.8394011858160513e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.6875, "sft_loss": 0.6953125, "step": 2707 }, { "dpo_loss": 0.1103515625, "epoch": 0.43, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 6.128864119368233e-07, "loss": 0.1758, "projector_lr": 1.83865923581047e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.5, "sft_loss": 0.8671875, "step": 2708 }, { "dpo_loss": 0.1259765625, "epoch": 0.43, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 6.126390661623867e-07, "loss": 0.0822, "projector_lr": 1.8379171984871601e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 6.65625, "rewards_train/rejected": -8.0625, "sft_loss": 0.73046875, "step": 2709 }, { "dpo_loss": 0.39453125, "epoch": 0.43, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 6.123916913458148e-07, "loss": 0.306, "projector_lr": 1.8371750740374445e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 6.28125, "rewards_train/rejected": -7.5, "sft_loss": 0.8671875, "step": 2710 }, { "dpo_loss": 0.1259765625, "epoch": 0.43, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 6.12144287550889e-07, "loss": 0.0966, "projector_lr": 1.8364328626526671e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.84375, "sft_loss": 0.78125, "step": 2711 }, { "dpo_loss": 0.1474609375, "epoch": 0.43, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 6.118968548413985e-07, "loss": 0.1242, "projector_lr": 1.8356905645241958e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.4375, "sft_loss": 0.95703125, "step": 2712 }, { "dpo_loss": 0.166015625, "epoch": 0.43, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 6.116493932811395e-07, "loss": 0.2607, "projector_lr": 1.8349481798434186e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.4375, "sft_loss": 0.60546875, "step": 2713 }, { "dpo_loss": 0.142578125, "epoch": 0.43, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 6.114019029339162e-07, "loss": 0.2255, "projector_lr": 1.8342057088017486e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.1875, "sft_loss": 0.83203125, "step": 2714 }, { "dpo_loss": 0.07177734375, "epoch": 0.43, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 6.111543838635397e-07, "loss": 0.0471, "projector_lr": 1.833463151590619e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 3.625, "rewards_train/rejected": -4.71875, "sft_loss": 0.7578125, "step": 2715 }, { "dpo_loss": 0.1015625, "epoch": 0.43, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 6.109068361338287e-07, "loss": 0.0753, "projector_lr": 1.8327205084014863e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.1875, "sft_loss": 0.921875, "step": 2716 }, { "dpo_loss": 0.0869140625, "epoch": 0.43, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 6.106592598086096e-07, "loss": 0.1091, "projector_lr": 1.8319777794258288e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.3125, "sft_loss": 0.79296875, "step": 2717 }, { "dpo_loss": 0.02734375, "epoch": 0.43, "final_loss": 0.02734375, "grad_norm": 0.0, "learning_rate": 6.104116549517158e-07, "loss": 0.0611, "projector_lr": 1.8312349648551474e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.71875, "sft_loss": 0.95703125, "step": 2718 }, { "dpo_loss": 0.091796875, "epoch": 0.44, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 6.101640216269879e-07, "loss": 0.1198, "projector_lr": 1.8304920648809637e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -3.15625, "sft_loss": 0.68359375, "step": 2719 }, { "dpo_loss": 0.294921875, "epoch": 0.44, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 6.099163598982742e-07, "loss": 0.4385, "projector_lr": 1.829749079694823e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.28125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -6.5, "sft_loss": 0.69140625, "step": 2720 }, { "dpo_loss": 0.51171875, "epoch": 0.44, "final_loss": 0.51171875, "grad_norm": 0.0, "learning_rate": 6.096686698294306e-07, "loss": 0.299, "projector_lr": 1.8290060094882917e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.3125, "sft_loss": 0.78515625, "step": 2721 }, { "dpo_loss": 0.419921875, "epoch": 0.44, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 6.094209514843193e-07, "loss": 0.391, "projector_lr": 1.828262854452958e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.71875, "sft_loss": 0.703125, "step": 2722 }, { "dpo_loss": 0.2001953125, "epoch": 0.44, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 6.091732049268108e-07, "loss": 0.1083, "projector_lr": 1.8275196147804325e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.875, "sft_loss": 0.8125, "step": 2723 }, { "dpo_loss": 0.0159912109375, "epoch": 0.44, "final_loss": 0.0159912109375, "grad_norm": 0.0, "learning_rate": 6.089254302207825e-07, "loss": 0.0674, "projector_lr": 1.8267762906623476e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 7.09375, "rewards_train/rejected": -7.5625, "sft_loss": 0.625, "step": 2724 }, { "dpo_loss": 0.10888671875, "epoch": 0.44, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 6.086776274301188e-07, "loss": 0.2319, "projector_lr": 1.8260328822903568e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.03125, "sft_loss": 0.93359375, "step": 2725 }, { "dpo_loss": 0.04296875, "epoch": 0.44, "final_loss": 0.04296875, "grad_norm": 0.0, "learning_rate": 6.084297966187119e-07, "loss": 0.3803, "projector_lr": 1.8252893898561357e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 6.8125, "rewards_train/rejected": -7.375, "sft_loss": 0.88671875, "step": 2726 }, { "dpo_loss": 0.1494140625, "epoch": 0.44, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 6.081819378504605e-07, "loss": 0.0807, "projector_lr": 1.824545813551382e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.5625, "sft_loss": 0.74609375, "step": 2727 }, { "dpo_loss": 0.6328125, "epoch": 0.44, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 6.079340511892714e-07, "loss": 0.3459, "projector_lr": 1.8238021535678143e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.375, "sft_loss": 0.69921875, "step": 2728 }, { "dpo_loss": 0.04150390625, "epoch": 0.44, "final_loss": 0.04150390625, "grad_norm": 0.0, "learning_rate": 6.076861366990578e-07, "loss": 0.1967, "projector_lr": 1.8230584100971734e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.28125, "sft_loss": 0.8359375, "step": 2729 }, { "dpo_loss": 0.1748046875, "epoch": 0.44, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 6.074381944437403e-07, "loss": 0.2201, "projector_lr": 1.8223145833312212e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.875, "sft_loss": 0.609375, "step": 2730 }, { "dpo_loss": 0.1552734375, "epoch": 0.44, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 6.07190224487247e-07, "loss": 0.0939, "projector_lr": 1.821570673461741e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.75, "sft_loss": 0.65234375, "step": 2731 }, { "dpo_loss": 0.3046875, "epoch": 0.44, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 6.069422268935128e-07, "loss": 0.3957, "projector_lr": 1.8208266806805383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.34375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.40625, "sft_loss": 0.7265625, "step": 2732 }, { "dpo_loss": 0.024169921875, "epoch": 0.44, "final_loss": 0.024169921875, "grad_norm": 0.0, "learning_rate": 6.066942017264798e-07, "loss": 0.0367, "projector_lr": 1.8200826051794395e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.53125, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.15625, "sft_loss": 0.80078125, "step": 2733 }, { "dpo_loss": 0.1474609375, "epoch": 0.44, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 6.064461490500971e-07, "loss": 0.1042, "projector_lr": 1.8193384471502913e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.703125, "sft_loss": 0.546875, "step": 2734 }, { "dpo_loss": 0.66015625, "epoch": 0.44, "final_loss": 0.66015625, "grad_norm": 0.0, "learning_rate": 6.061980689283211e-07, "loss": 0.3529, "projector_lr": 1.8185942067849636e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.59375, "sft_loss": 0.80859375, "step": 2735 }, { "dpo_loss": 0.1708984375, "epoch": 0.44, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 6.059499614251153e-07, "loss": 0.1614, "projector_lr": 1.8178498842753462e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.84375, "sft_loss": 1.0078125, "step": 2736 }, { "dpo_loss": 0.1806640625, "epoch": 0.44, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 6.057018266044502e-07, "loss": 0.1145, "projector_lr": 1.8171054798133505e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.8125, "sft_loss": 0.71875, "step": 2737 }, { "dpo_loss": 0.166015625, "epoch": 0.44, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 6.054536645303029e-07, "loss": 0.1543, "projector_lr": 1.8163609935909088e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.5625, "sft_loss": 0.9375, "step": 2738 }, { "dpo_loss": 0.1240234375, "epoch": 0.44, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 6.052054752666584e-07, "loss": 0.1224, "projector_lr": 1.8156164257999752e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.375, "sft_loss": 0.6484375, "step": 2739 }, { "dpo_loss": 0.09033203125, "epoch": 0.44, "final_loss": 0.09033203125, "grad_norm": 0.0, "learning_rate": 6.049572588775077e-07, "loss": 0.1306, "projector_lr": 1.8148717766325232e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.53125, "sft_loss": 0.69921875, "step": 2740 }, { "dpo_loss": 0.029296875, "epoch": 0.44, "final_loss": 0.029296875, "grad_norm": 0.0, "learning_rate": 6.047090154268498e-07, "loss": 0.3409, "projector_lr": 1.8141270462805494e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.125, "sft_loss": 0.8671875, "step": 2741 }, { "dpo_loss": 0.1533203125, "epoch": 0.44, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 6.0446074497869e-07, "loss": 0.0877, "projector_lr": 1.8133822349360703e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.53125, "sft_loss": 0.84765625, "step": 2742 }, { "dpo_loss": 0.1630859375, "epoch": 0.44, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 6.042124475970408e-07, "loss": 0.2702, "projector_lr": 1.8126373427911226e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.75, "sft_loss": 0.65625, "step": 2743 }, { "dpo_loss": 0.2080078125, "epoch": 0.44, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 6.039641233459216e-07, "loss": 0.1153, "projector_lr": 1.811892370037765e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 6.625, "rewards_train/rejected": -7.125, "sft_loss": 0.74609375, "step": 2744 }, { "dpo_loss": 0.44140625, "epoch": 0.44, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 6.037157722893588e-07, "loss": 0.2491, "projector_lr": 1.8111473168680764e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.25, "sft_loss": 0.66796875, "step": 2745 }, { "dpo_loss": 0.294921875, "epoch": 0.44, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 6.034673944913855e-07, "loss": 0.2275, "projector_lr": 1.8104021834741566e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.78125, "sft_loss": 0.65234375, "step": 2746 }, { "dpo_loss": 0.0703125, "epoch": 0.44, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 6.032189900160418e-07, "loss": 0.1205, "projector_lr": 1.8096569700481256e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.75, "sft_loss": 0.84375, "step": 2747 }, { "dpo_loss": 0.2734375, "epoch": 0.44, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 6.029705589273751e-07, "loss": 0.3593, "projector_lr": 1.8089116767821256e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -6.15625, "sft_loss": 0.80859375, "step": 2748 }, { "dpo_loss": 0.0751953125, "epoch": 0.44, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 6.027221012894388e-07, "loss": 0.1193, "projector_lr": 1.8081663038683165e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.119140625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -4.40625, "sft_loss": 0.7734375, "step": 2749 }, { "dpo_loss": 0.26171875, "epoch": 0.44, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 6.024736171662938e-07, "loss": 0.171, "projector_lr": 1.8074208514988816e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 3.296875, "rewards_train/rejected": -5.4375, "sft_loss": 1.078125, "step": 2750 }, { "dpo_loss": 0.02685546875, "epoch": 0.44, "final_loss": 0.02685546875, "grad_norm": 0.0, "learning_rate": 6.02225106622008e-07, "loss": 0.0967, "projector_lr": 1.806675319866024e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.46875, "sft_loss": 0.53515625, "step": 2751 }, { "dpo_loss": 0.7734375, "epoch": 0.44, "final_loss": 0.7734375, "grad_norm": 0.0, "learning_rate": 6.019765697206551e-07, "loss": 0.4412, "projector_lr": 1.8059297091619654e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.609375, "rewards_train/margins": 1.7578125, "rewards_train/rejected": -4.375, "sft_loss": 0.94140625, "step": 2752 }, { "dpo_loss": 0.29296875, "epoch": 0.44, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 6.01728006526317e-07, "loss": 0.1698, "projector_lr": 1.8051840195789509e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.28125, "sft_loss": 1.03125, "step": 2753 }, { "dpo_loss": 0.71875, "epoch": 0.44, "final_loss": 0.71875, "grad_norm": 0.0, "learning_rate": 6.01479417103081e-07, "loss": 0.3989, "projector_lr": 1.8044382513092432e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -5.21875, "sft_loss": 0.9296875, "step": 2754 }, { "dpo_loss": 0.2197265625, "epoch": 0.44, "final_loss": 0.2197265625, "grad_norm": 0.0, "learning_rate": 6.01230801515042e-07, "loss": 0.4243, "projector_lr": 1.8036924045451262e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.294921875, "rewards_train/margins": 3.625, "rewards_train/rejected": -3.921875, "sft_loss": 0.62109375, "step": 2755 }, { "dpo_loss": 0.2431640625, "epoch": 0.44, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 6.009821598263018e-07, "loss": 0.1857, "projector_lr": 1.8029464794789057e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.828125, "rewards_train/rejected": -5.1875, "sft_loss": 0.84375, "step": 2756 }, { "dpo_loss": 0.171875, "epoch": 0.44, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 6.007334921009681e-07, "loss": 0.1914, "projector_lr": 1.8022004763029046e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.59375, "sft_loss": 0.8984375, "step": 2757 }, { "dpo_loss": 0.07958984375, "epoch": 0.44, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 6.004847984031561e-07, "loss": 0.0566, "projector_lr": 1.8014543952094682e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23046875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.65625, "sft_loss": 0.71875, "step": 2758 }, { "dpo_loss": 0.322265625, "epoch": 0.44, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 6.002360787969873e-07, "loss": 0.1889, "projector_lr": 1.8007082363909618e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.875, "sft_loss": 0.984375, "step": 2759 }, { "dpo_loss": 0.1181640625, "epoch": 0.44, "final_loss": 0.1181640625, "grad_norm": 0.0, "learning_rate": 5.999873333465898e-07, "loss": 0.4288, "projector_lr": 1.7999620000397696e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.46875, "sft_loss": 1.1015625, "step": 2760 }, { "dpo_loss": 0.05078125, "epoch": 0.44, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 5.997385621160988e-07, "loss": 0.153, "projector_lr": 1.7992156863482964e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 6.90625, "rewards_train/rejected": -7.375, "sft_loss": 0.58203125, "step": 2761 }, { "dpo_loss": 0.2734375, "epoch": 0.44, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 5.994897651696559e-07, "loss": 0.2234, "projector_lr": 1.7984692955089678e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.4375, "sft_loss": 0.6875, "step": 2762 }, { "dpo_loss": 0.40625, "epoch": 0.44, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 5.992409425714089e-07, "loss": 0.5871, "projector_lr": 1.7977228277142267e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.375, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.875, "sft_loss": 0.91015625, "step": 2763 }, { "dpo_loss": 0.228515625, "epoch": 0.44, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 5.989920943855131e-07, "loss": 0.2486, "projector_lr": 1.7969762831565395e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.279296875, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.15625, "sft_loss": 0.8203125, "step": 2764 }, { "dpo_loss": 0.087890625, "epoch": 0.44, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 5.987432206761297e-07, "loss": 0.2037, "projector_lr": 1.7962296620283892e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.578125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.78125, "sft_loss": 1.125, "step": 2765 }, { "dpo_loss": 0.31640625, "epoch": 0.44, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 5.984943215074269e-07, "loss": 0.271, "projector_lr": 1.7954829645222808e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -7.25, "sft_loss": 0.89453125, "step": 2766 }, { "dpo_loss": 0.34375, "epoch": 0.44, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 5.982453969435788e-07, "loss": 0.3969, "projector_lr": 1.7947361908307364e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.90625, "rewards_train/margins": 3.265625, "rewards_train/rejected": -5.1875, "sft_loss": 0.95703125, "step": 2767 }, { "dpo_loss": 0.09326171875, "epoch": 0.44, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 5.979964470487669e-07, "loss": 0.0877, "projector_lr": 1.7939893411463009e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.546875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.03125, "sft_loss": 0.65625, "step": 2768 }, { "dpo_loss": 0.12890625, "epoch": 0.44, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 5.977474718871789e-07, "loss": 0.1252, "projector_lr": 1.7932424156615368e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.375, "sft_loss": 0.8671875, "step": 2769 }, { "dpo_loss": 0.31640625, "epoch": 0.44, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 5.974984715230087e-07, "loss": 0.2643, "projector_lr": 1.7924954145690262e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.1875, "sft_loss": 0.734375, "step": 2770 }, { "dpo_loss": 0.20703125, "epoch": 0.44, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 5.972494460204571e-07, "loss": 0.1587, "projector_lr": 1.7917483380613715e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.75, "sft_loss": 0.6953125, "step": 2771 }, { "dpo_loss": 0.1845703125, "epoch": 0.44, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 5.970003954437312e-07, "loss": 0.423, "projector_lr": 1.7910011863311939e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.25, "sft_loss": 0.84375, "step": 2772 }, { "dpo_loss": 0.11376953125, "epoch": 0.44, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 5.967513198570448e-07, "loss": 0.068, "projector_lr": 1.7902539595711346e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.65625, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.15625, "sft_loss": 0.89453125, "step": 2773 }, { "dpo_loss": 0.125, "epoch": 0.44, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 5.965022193246177e-07, "loss": 0.1257, "projector_lr": 1.7895066579738531e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.75, "sft_loss": 0.83203125, "step": 2774 }, { "dpo_loss": 0.240234375, "epoch": 0.44, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 5.962530939106765e-07, "loss": 0.1705, "projector_lr": 1.7887592817320295e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.15625, "sft_loss": 0.61328125, "step": 2775 }, { "dpo_loss": 0.06396484375, "epoch": 0.44, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 5.960039436794538e-07, "loss": 0.1417, "projector_lr": 1.7880118310383616e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.6875, "sft_loss": 0.94921875, "step": 2776 }, { "dpo_loss": 0.166015625, "epoch": 0.44, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 5.957547686951894e-07, "loss": 0.2283, "projector_lr": 1.7872643060855683e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.71875, "sft_loss": 0.8671875, "step": 2777 }, { "dpo_loss": 0.0224609375, "epoch": 0.44, "final_loss": 0.0224609375, "grad_norm": 0.0, "learning_rate": 5.955055690221288e-07, "loss": 0.201, "projector_lr": 1.7865167070663865e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.0625, "sft_loss": 0.4921875, "step": 2778 }, { "dpo_loss": 0.216796875, "epoch": 0.44, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 5.952563447245238e-07, "loss": 0.2696, "projector_lr": 1.7857690341735715e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 2.359375, "rewards_train/rejected": -3.53125, "sft_loss": 0.90625, "step": 2779 }, { "dpo_loss": 0.201171875, "epoch": 0.44, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 5.950070958666332e-07, "loss": 0.1464, "projector_lr": 1.7850212875998997e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.0625, "sft_loss": 0.67578125, "step": 2780 }, { "dpo_loss": 0.15625, "epoch": 0.44, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 5.947578225127214e-07, "loss": 0.1762, "projector_lr": 1.7842734675381645e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.71875, "sft_loss": 0.69140625, "step": 2781 }, { "dpo_loss": 0.166015625, "epoch": 0.45, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 5.945085247270597e-07, "loss": 0.339, "projector_lr": 1.7835255741811793e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.15625, "sft_loss": 0.69921875, "step": 2782 }, { "dpo_loss": 0.01116943359375, "epoch": 0.45, "final_loss": 0.01116943359375, "grad_norm": 0.0, "learning_rate": 5.942592025739252e-07, "loss": 0.4942, "projector_lr": 1.7827776077217755e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 8.0625, "rewards_train/rejected": -9.5, "sft_loss": 0.55078125, "step": 2783 }, { "dpo_loss": 0.1083984375, "epoch": 0.45, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 5.940098561176016e-07, "loss": 0.3076, "projector_lr": 1.7820295683528053e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.1875, "sft_loss": 0.8359375, "step": 2784 }, { "dpo_loss": 0.123046875, "epoch": 0.45, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 5.93760485422379e-07, "loss": 0.123, "projector_lr": 1.7812814562671371e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.125, "sft_loss": 0.6796875, "step": 2785 }, { "dpo_loss": 0.13671875, "epoch": 0.45, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 5.935110905525533e-07, "loss": 0.083, "projector_lr": 1.7805332716576597e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.9375, "sft_loss": 0.79296875, "step": 2786 }, { "dpo_loss": 0.2001953125, "epoch": 0.45, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 5.932616715724269e-07, "loss": 0.1906, "projector_lr": 1.7797850147172808e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.5625, "sft_loss": 1.0, "step": 2787 }, { "dpo_loss": 0.384765625, "epoch": 0.45, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 5.930122285463084e-07, "loss": 0.261, "projector_lr": 1.7790366856389252e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.375, "sft_loss": 0.6015625, "step": 2788 }, { "dpo_loss": 0.044677734375, "epoch": 0.45, "final_loss": 0.044677734375, "grad_norm": 0.0, "learning_rate": 5.927627615385128e-07, "loss": 0.0502, "projector_lr": 1.7782882846155385e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.341796875, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.03125, "sft_loss": 0.8203125, "step": 2789 }, { "dpo_loss": 0.06298828125, "epoch": 0.45, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 5.925132706133608e-07, "loss": 0.0878, "projector_lr": 1.7775398118400824e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.65625, "sft_loss": 0.80859375, "step": 2790 }, { "dpo_loss": 0.0673828125, "epoch": 0.45, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 5.922637558351797e-07, "loss": 0.1053, "projector_lr": 1.7767912675055394e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.09375, "sft_loss": 0.69140625, "step": 2791 }, { "dpo_loss": 0.474609375, "epoch": 0.45, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 5.920142172683027e-07, "loss": 0.255, "projector_lr": 1.7760426518049081e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.65625, "sft_loss": 0.859375, "step": 2792 }, { "dpo_loss": 0.458984375, "epoch": 0.45, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 5.917646549770694e-07, "loss": 0.2755, "projector_lr": 1.7752939649312084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.28125, "sft_loss": 0.6640625, "step": 2793 }, { "dpo_loss": 0.234375, "epoch": 0.45, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 5.915150690258251e-07, "loss": 0.158, "projector_lr": 1.7745452070774752e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.96875, "sft_loss": 1.1171875, "step": 2794 }, { "dpo_loss": 0.004669189453125, "epoch": 0.45, "final_loss": 0.004669189453125, "grad_norm": 0.0, "learning_rate": 5.912654594789214e-07, "loss": 0.2645, "projector_lr": 1.7737963784367646e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 6.9375, "rewards_train/rejected": -8.375, "sft_loss": 0.9375, "step": 2795 }, { "dpo_loss": 0.05029296875, "epoch": 0.45, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 5.910158264007164e-07, "loss": 0.1165, "projector_lr": 1.7730474792021495e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.8125, "sft_loss": 0.7265625, "step": 2796 }, { "dpo_loss": 0.07373046875, "epoch": 0.45, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 5.907661698555737e-07, "loss": 0.1497, "projector_lr": 1.7722985095667213e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.125, "sft_loss": 0.89453125, "step": 2797 }, { "dpo_loss": 0.10302734375, "epoch": 0.45, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 5.905164899078629e-07, "loss": 0.2175, "projector_lr": 1.771549469723589e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.96875, "sft_loss": 0.69140625, "step": 2798 }, { "dpo_loss": 0.46875, "epoch": 0.45, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 5.902667866219604e-07, "loss": 0.441, "projector_lr": 1.7708003598658813e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 2.8125, "rewards_train/rejected": -4.5625, "sft_loss": 0.84765625, "step": 2799 }, { "dpo_loss": 0.059326171875, "epoch": 0.45, "final_loss": 0.059326171875, "grad_norm": 0.0, "learning_rate": 5.900170600622476e-07, "loss": 0.1349, "projector_lr": 1.770051180186743e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.5, "sft_loss": 0.625, "step": 2800 }, { "dpo_loss": 0.197265625, "epoch": 0.45, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 5.897673102931126e-07, "loss": 0.1724, "projector_lr": 1.769301930879338e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.40625, "sft_loss": 0.765625, "step": 2801 }, { "dpo_loss": 0.1015625, "epoch": 0.45, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 5.895175373789495e-07, "loss": 0.1901, "projector_lr": 1.7685526121368487e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.71875, "sft_loss": 0.734375, "step": 2802 }, { "dpo_loss": 0.0076904296875, "epoch": 0.45, "final_loss": 0.0076904296875, "grad_norm": 0.0, "learning_rate": 5.892677413841577e-07, "loss": 0.0486, "projector_lr": 1.7678032241524732e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.32421875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -5.5, "sft_loss": 0.65625, "step": 2803 }, { "dpo_loss": 0.0205078125, "epoch": 0.45, "final_loss": 0.0205078125, "grad_norm": 0.0, "learning_rate": 5.890179223731433e-07, "loss": 0.075, "projector_lr": 1.76705376711943e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 6.59375, "rewards_train/rejected": -6.75, "sft_loss": 0.78125, "step": 2804 }, { "dpo_loss": 0.0771484375, "epoch": 0.45, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 5.88768080410318e-07, "loss": 0.3517, "projector_lr": 1.766304241230954e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.84375, "sft_loss": 0.8515625, "step": 2805 }, { "dpo_loss": 0.03662109375, "epoch": 0.45, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 5.885182155600993e-07, "loss": 0.0871, "projector_lr": 1.765554646680298e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.46875, "sft_loss": 0.5625, "step": 2806 }, { "dpo_loss": 0.11083984375, "epoch": 0.45, "final_loss": 0.11083984375, "grad_norm": 0.0, "learning_rate": 5.88268327886911e-07, "loss": 0.0877, "projector_lr": 1.764804983660733e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.5, "sft_loss": 0.51953125, "step": 2807 }, { "dpo_loss": 0.26171875, "epoch": 0.45, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 5.880184174551822e-07, "loss": 0.2579, "projector_lr": 1.7640552523655467e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.71875, "sft_loss": 0.73046875, "step": 2808 }, { "dpo_loss": 0.05859375, "epoch": 0.45, "final_loss": 0.05859375, "grad_norm": 0.0, "learning_rate": 5.877684843293487e-07, "loss": 0.0552, "projector_lr": 1.7633054529880461e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.0, "sft_loss": 0.62890625, "step": 2809 }, { "dpo_loss": 0.1640625, "epoch": 0.45, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 5.875185285738511e-07, "loss": 0.1772, "projector_lr": 1.7625555857215535e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.484375, "rewards_train/rejected": -5.0, "sft_loss": 0.75, "step": 2810 }, { "dpo_loss": 0.1171875, "epoch": 0.45, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 5.872685502531367e-07, "loss": 0.0884, "projector_lr": 1.7618056507594103e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71484375, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.46875, "sft_loss": 0.69921875, "step": 2811 }, { "dpo_loss": 0.10205078125, "epoch": 0.45, "final_loss": 0.10205078125, "grad_norm": 0.0, "learning_rate": 5.870185494316583e-07, "loss": 0.0906, "projector_lr": 1.761055648294975e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.53125, "sft_loss": 0.88671875, "step": 2812 }, { "dpo_loss": 0.154296875, "epoch": 0.45, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 5.867685261738744e-07, "loss": 0.146, "projector_lr": 1.7603055785216232e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.4375, "sft_loss": 0.62890625, "step": 2813 }, { "dpo_loss": 0.059326171875, "epoch": 0.45, "final_loss": 0.059326171875, "grad_norm": 0.0, "learning_rate": 5.865184805442496e-07, "loss": 0.1565, "projector_lr": 1.7595554416327487e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.03125, "sft_loss": 0.64453125, "step": 2814 }, { "dpo_loss": 0.2431640625, "epoch": 0.45, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 5.862684126072538e-07, "loss": 0.1408, "projector_lr": 1.7588052378217616e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 2.390625, "rewards_train/rejected": -3.609375, "sft_loss": 0.84375, "step": 2815 }, { "dpo_loss": 0.045166015625, "epoch": 0.45, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 5.860183224273631e-07, "loss": 0.0418, "projector_lr": 1.7580549672820897e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.96875, "sft_loss": 0.8203125, "step": 2816 }, { "dpo_loss": 0.04931640625, "epoch": 0.45, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 5.857682100690592e-07, "loss": 0.0811, "projector_lr": 1.7573046302071776e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.65625, "sft_loss": 0.70703125, "step": 2817 }, { "dpo_loss": 0.25, "epoch": 0.45, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 5.855180755968293e-07, "loss": 0.3704, "projector_lr": 1.756554226790488e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.625, "sft_loss": 0.62890625, "step": 2818 }, { "dpo_loss": 0.234375, "epoch": 0.45, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 5.852679190751664e-07, "loss": 0.1278, "projector_lr": 1.7558037572254992e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.0625, "sft_loss": 0.59765625, "step": 2819 }, { "dpo_loss": 0.041259765625, "epoch": 0.45, "final_loss": 0.041259765625, "grad_norm": 0.0, "learning_rate": 5.850177405685696e-07, "loss": 0.0231, "projector_lr": 1.7550532217057089e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.40625, "sft_loss": 0.8984375, "step": 2820 }, { "dpo_loss": 0.208984375, "epoch": 0.45, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 5.847675401415429e-07, "loss": 0.1133, "projector_lr": 1.754302620424629e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.71875, "sft_loss": 0.9609375, "step": 2821 }, { "dpo_loss": 0.3828125, "epoch": 0.45, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 5.845173178585968e-07, "loss": 0.2888, "projector_lr": 1.7535519535757903e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.84375, "rewards_train/margins": 6.59375, "rewards_train/rejected": -8.4375, "sft_loss": 0.55859375, "step": 2822 }, { "dpo_loss": 0.099609375, "epoch": 0.45, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 5.842670737842467e-07, "loss": 0.1658, "projector_lr": 1.7528012213527402e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.03125, "sft_loss": 0.91015625, "step": 2823 }, { "dpo_loss": 0.79296875, "epoch": 0.45, "final_loss": 0.79296875, "grad_norm": 0.0, "learning_rate": 5.840168079830139e-07, "loss": 0.4933, "projector_lr": 1.7520504239490417e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.09375, "rewards_train/margins": 2.78125, "rewards_train/rejected": -4.875, "sft_loss": 0.984375, "step": 2824 }, { "dpo_loss": 0.0732421875, "epoch": 0.45, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 5.837665205194256e-07, "loss": 0.2266, "projector_lr": 1.7512995615582772e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.65625, "sft_loss": 0.890625, "step": 2825 }, { "dpo_loss": 0.455078125, "epoch": 0.45, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 5.835162114580141e-07, "loss": 0.3775, "projector_lr": 1.7505486343740424e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.71875, "sft_loss": 0.87109375, "step": 2826 }, { "dpo_loss": 0.1943359375, "epoch": 0.45, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 5.832658808633178e-07, "loss": 0.1566, "projector_lr": 1.7497976425899532e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.71875, "sft_loss": 0.67578125, "step": 2827 }, { "dpo_loss": 0.1435546875, "epoch": 0.45, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 5.830155287998799e-07, "loss": 0.1161, "projector_lr": 1.7490465863996396e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.5, "sft_loss": 0.79296875, "step": 2828 }, { "dpo_loss": 0.3984375, "epoch": 0.45, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 5.827651553322498e-07, "loss": 0.2623, "projector_lr": 1.7482954659967494e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.78125, "sft_loss": 0.72265625, "step": 2829 }, { "dpo_loss": 0.1611328125, "epoch": 0.45, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 5.82514760524982e-07, "loss": 0.2011, "projector_lr": 1.7475442815749463e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1875, "rewards_train/margins": 2.40625, "rewards_train/rejected": -3.59375, "sft_loss": 0.62109375, "step": 2830 }, { "dpo_loss": 0.1318359375, "epoch": 0.45, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 5.822643444426369e-07, "loss": 0.1184, "projector_lr": 1.746793033327911e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 3.359375, "rewards_train/rejected": -5.09375, "sft_loss": 0.88671875, "step": 2831 }, { "dpo_loss": 0.19921875, "epoch": 0.45, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 5.820139071497804e-07, "loss": 0.1586, "projector_lr": 1.7460417214493412e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.1875, "sft_loss": 0.8984375, "step": 2832 }, { "dpo_loss": 0.033447265625, "epoch": 0.45, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 5.817634487109831e-07, "loss": 0.0376, "projector_lr": 1.7452903461329496e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.515625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -8.5625, "sft_loss": 0.69921875, "step": 2833 }, { "dpo_loss": 0.384765625, "epoch": 0.45, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 5.815129691908221e-07, "loss": 0.3327, "projector_lr": 1.7445389075724666e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -5.03125, "sft_loss": 1.0, "step": 2834 }, { "dpo_loss": 0.283203125, "epoch": 0.45, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 5.812624686538791e-07, "loss": 0.2226, "projector_lr": 1.7437874059616372e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.28125, "sft_loss": 0.76953125, "step": 2835 }, { "dpo_loss": 0.27734375, "epoch": 0.45, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 5.810119471647416e-07, "loss": 0.1993, "projector_lr": 1.743035841494225e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.65625, "sft_loss": 0.78125, "step": 2836 }, { "dpo_loss": 0.042236328125, "epoch": 0.45, "final_loss": 0.042236328125, "grad_norm": 0.0, "learning_rate": 5.807614047880028e-07, "loss": 0.1736, "projector_lr": 1.7422842143640083e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.59375, "sft_loss": 0.66015625, "step": 2837 }, { "dpo_loss": 0.18359375, "epoch": 0.45, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 5.805108415882604e-07, "loss": 0.2159, "projector_lr": 1.7415325247647812e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.8125, "sft_loss": 0.83984375, "step": 2838 }, { "dpo_loss": 0.06884765625, "epoch": 0.45, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 5.802602576301183e-07, "loss": 0.0786, "projector_lr": 1.7407807728903551e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.0625, "sft_loss": 0.98828125, "step": 2839 }, { "dpo_loss": 0.055908203125, "epoch": 0.45, "final_loss": 0.055908203125, "grad_norm": 0.0, "learning_rate": 5.800096529781856e-07, "loss": 0.154, "projector_lr": 1.740028958934557e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -7.0, "sft_loss": 0.734375, "step": 2840 }, { "dpo_loss": 0.46484375, "epoch": 0.45, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 5.797590276970763e-07, "loss": 0.3019, "projector_lr": 1.739277083091229e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.0, "rewards_train/margins": 2.578125, "rewards_train/rejected": -4.59375, "sft_loss": 1.4765625, "step": 2841 }, { "dpo_loss": 0.306640625, "epoch": 0.45, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 5.795083818514101e-07, "loss": 0.4238, "projector_lr": 1.7385251455542306e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.421875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -5.46875, "sft_loss": 0.8125, "step": 2842 }, { "dpo_loss": 0.1953125, "epoch": 0.45, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 5.792577155058121e-07, "loss": 0.1042, "projector_lr": 1.7377731465174367e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 2.0625, "rewards_train/rejected": -3.84375, "sft_loss": 0.83984375, "step": 2843 }, { "dpo_loss": 0.033203125, "epoch": 0.46, "final_loss": 0.033203125, "grad_norm": 0.0, "learning_rate": 5.790070287249124e-07, "loss": 0.0978, "projector_lr": 1.7370210861747373e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.53125, "sft_loss": 0.93359375, "step": 2844 }, { "dpo_loss": 0.01495361328125, "epoch": 0.46, "final_loss": 0.01495361328125, "grad_norm": 0.0, "learning_rate": 5.787563215733465e-07, "loss": 0.02, "projector_lr": 1.7362689647200393e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.46875, "sft_loss": 0.66796875, "step": 2845 }, { "dpo_loss": 0.52734375, "epoch": 0.46, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 5.785055941157549e-07, "loss": 0.2708, "projector_lr": 1.735516782347265e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.53125, "rewards_train/margins": 4.8125, "rewards_train/rejected": -7.34375, "sft_loss": 0.78125, "step": 2846 }, { "dpo_loss": 0.322265625, "epoch": 0.46, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 5.782548464167839e-07, "loss": 0.2102, "projector_lr": 1.734764539250352e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.8125, "sft_loss": 0.9921875, "step": 2847 }, { "dpo_loss": 0.455078125, "epoch": 0.46, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 5.780040785410844e-07, "loss": 0.3524, "projector_lr": 1.7340122356232535e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.578125, "rewards_train/margins": 3.921875, "rewards_train/rejected": -6.5, "sft_loss": 0.90625, "step": 2848 }, { "dpo_loss": 0.0181884765625, "epoch": 0.46, "final_loss": 0.0181884765625, "grad_norm": 0.0, "learning_rate": 5.77753290553313e-07, "loss": 0.1911, "projector_lr": 1.7332598716599393e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.4375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -7.75, "sft_loss": 1.0703125, "step": 2849 }, { "dpo_loss": 0.11669921875, "epoch": 0.46, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 5.775024825181312e-07, "loss": 0.1482, "projector_lr": 1.7325074475543936e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -7.65625, "sft_loss": 1.015625, "step": 2850 }, { "dpo_loss": 0.427734375, "epoch": 0.46, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 5.772516545002056e-07, "loss": 0.411, "projector_lr": 1.731754963500617e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.109375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -7.34375, "sft_loss": 0.87109375, "step": 2851 }, { "dpo_loss": 0.228515625, "epoch": 0.46, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 5.770008065642083e-07, "loss": 0.1369, "projector_lr": 1.7310024196926252e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.1875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -4.4375, "sft_loss": 1.1328125, "step": 2852 }, { "dpo_loss": 0.1982421875, "epoch": 0.46, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 5.767499387748161e-07, "loss": 0.1543, "projector_lr": 1.7302498163244486e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.125, "sft_loss": 0.796875, "step": 2853 }, { "dpo_loss": 0.37890625, "epoch": 0.46, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 5.764990511967115e-07, "loss": 0.4642, "projector_lr": 1.7294971535901347e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 2.234375, "rewards_train/rejected": -3.59375, "sft_loss": 0.74609375, "step": 2854 }, { "dpo_loss": 0.427734375, "epoch": 0.46, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 5.762481438945813e-07, "loss": 0.2567, "projector_lr": 1.728744431683744e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.75, "sft_loss": 0.76171875, "step": 2855 }, { "dpo_loss": 0.25390625, "epoch": 0.46, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 5.759972169331182e-07, "loss": 0.2891, "projector_lr": 1.7279916507993547e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -7.25, "sft_loss": 0.70703125, "step": 2856 }, { "dpo_loss": 0.36328125, "epoch": 0.46, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 5.757462703770194e-07, "loss": 0.2309, "projector_lr": 1.7272388111310583e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.65625, "sft_loss": 0.83203125, "step": 2857 }, { "dpo_loss": 0.59765625, "epoch": 0.46, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 5.754953042909873e-07, "loss": 0.4869, "projector_lr": 1.7264859128729622e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -0.609375, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.53125, "sft_loss": 1.234375, "step": 2858 }, { "dpo_loss": 0.052001953125, "epoch": 0.46, "final_loss": 0.052001953125, "grad_norm": 0.0, "learning_rate": 5.752443187397297e-07, "loss": 0.364, "projector_lr": 1.7257329562191892e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.3125, "sft_loss": 0.78515625, "step": 2859 }, { "dpo_loss": 0.058837890625, "epoch": 0.46, "final_loss": 0.058837890625, "grad_norm": 0.0, "learning_rate": 5.749933137879586e-07, "loss": 0.1497, "projector_lr": 1.7249799413638762e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.09375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.625, "sft_loss": 0.91015625, "step": 2860 }, { "dpo_loss": 0.275390625, "epoch": 0.46, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 5.747422895003922e-07, "loss": 0.2807, "projector_lr": 1.7242268685011767e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.53125, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.90625, "sft_loss": 0.578125, "step": 2861 }, { "dpo_loss": 0.83984375, "epoch": 0.46, "final_loss": 0.83984375, "grad_norm": 0.0, "learning_rate": 5.744912459417524e-07, "loss": 0.4885, "projector_lr": 1.7234737378252572e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.75, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.9375, "sft_loss": 1.0703125, "step": 2862 }, { "dpo_loss": 0.08056640625, "epoch": 0.46, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 5.742401831767668e-07, "loss": 0.1664, "projector_lr": 1.7227205495303006e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.703125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.25, "sft_loss": 0.94140625, "step": 2863 }, { "dpo_loss": 0.4296875, "epoch": 0.46, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 5.739891012701678e-07, "loss": 0.2319, "projector_lr": 1.7219673038105037e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.625, "sft_loss": 0.9375, "step": 2864 }, { "dpo_loss": 0.09423828125, "epoch": 0.46, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 5.737380002866931e-07, "loss": 0.2612, "projector_lr": 1.7212140008600797e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.875, "sft_loss": 0.5703125, "step": 2865 }, { "dpo_loss": 0.11962890625, "epoch": 0.46, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 5.734868802910846e-07, "loss": 0.1929, "projector_lr": 1.720460640873254e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -3.96875, "sft_loss": 0.58984375, "step": 2866 }, { "dpo_loss": 0.236328125, "epoch": 0.46, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 5.732357413480895e-07, "loss": 0.3488, "projector_lr": 1.7197072240442687e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.265625, "rewards_train/margins": 4.4375, "rewards_train/rejected": -6.71875, "sft_loss": 1.078125, "step": 2867 }, { "dpo_loss": 0.0546875, "epoch": 0.46, "final_loss": 0.0546875, "grad_norm": 0.0, "learning_rate": 5.729845835224599e-07, "loss": 0.061, "projector_lr": 1.7189537505673799e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.8125, "sft_loss": 0.8515625, "step": 2868 }, { "dpo_loss": 0.0732421875, "epoch": 0.46, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 5.727334068789528e-07, "loss": 0.2923, "projector_lr": 1.7182002206368587e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.65625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.0625, "sft_loss": 0.890625, "step": 2869 }, { "dpo_loss": 0.17578125, "epoch": 0.46, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 5.7248221148233e-07, "loss": 0.4318, "projector_lr": 1.7174466344469901e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.3125, "rewards_train/margins": 3.90625, "rewards_train/rejected": -6.21875, "sft_loss": 1.3203125, "step": 2870 }, { "dpo_loss": 0.359375, "epoch": 0.46, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 5.72230997397358e-07, "loss": 0.2059, "projector_lr": 1.7166929921920743e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 3.328125, "rewards_train/rejected": -5.125, "sft_loss": 0.63671875, "step": 2871 }, { "dpo_loss": 0.034423828125, "epoch": 0.46, "final_loss": 0.034423828125, "grad_norm": 0.0, "learning_rate": 5.719797646888083e-07, "loss": 0.1967, "projector_lr": 1.715939294066425e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.25, "sft_loss": 0.81640625, "step": 2872 }, { "dpo_loss": 0.232421875, "epoch": 0.46, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 5.71728513421457e-07, "loss": 0.6416, "projector_lr": 1.7151855402643712e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.71875, "sft_loss": 0.9375, "step": 2873 }, { "dpo_loss": 0.095703125, "epoch": 0.46, "final_loss": 0.095703125, "grad_norm": 0.0, "learning_rate": 5.714772436600853e-07, "loss": 0.1191, "projector_lr": 1.7144317309802562e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.375, "sft_loss": 1.1640625, "step": 2874 }, { "dpo_loss": 0.314453125, "epoch": 0.46, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 5.71225955469479e-07, "loss": 0.2042, "projector_lr": 1.713677866408437e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.859375, "sft_loss": 0.91796875, "step": 2875 }, { "dpo_loss": 0.1552734375, "epoch": 0.46, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 5.709746489144284e-07, "loss": 0.2138, "projector_lr": 1.7129239467432852e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.09375, "rewards_train/margins": 3.390625, "rewards_train/rejected": -5.5, "sft_loss": 0.7109375, "step": 2876 }, { "dpo_loss": 0.0634765625, "epoch": 0.46, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 5.707233240597289e-07, "loss": 0.0397, "projector_lr": 1.7121699721791869e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.75, "sft_loss": 0.7578125, "step": 2877 }, { "dpo_loss": 0.1474609375, "epoch": 0.46, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 5.704719809701806e-07, "loss": 0.0877, "projector_lr": 1.7114159429105418e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.8125, "sft_loss": 0.97265625, "step": 2878 }, { "dpo_loss": 0.2890625, "epoch": 0.46, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 5.70220619710588e-07, "loss": 0.2311, "projector_lr": 1.710661859131764e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.46875, "sft_loss": 0.60546875, "step": 2879 }, { "dpo_loss": 0.09912109375, "epoch": 0.46, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 5.699692403457607e-07, "loss": 0.0805, "projector_lr": 1.709907721037282e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.90625, "sft_loss": 0.68359375, "step": 2880 }, { "dpo_loss": 0.353515625, "epoch": 0.46, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 5.697178429405126e-07, "loss": 0.1876, "projector_lr": 1.709153528821538e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 2.796875, "rewards_train/rejected": -4.3125, "sft_loss": 0.765625, "step": 2881 }, { "dpo_loss": 0.53515625, "epoch": 0.46, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 5.694664275596624e-07, "loss": 0.302, "projector_lr": 1.708399282678987e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.015625, "rewards_train/margins": 3.09375, "rewards_train/rejected": -5.125, "sft_loss": 1.328125, "step": 2882 }, { "dpo_loss": 0.09423828125, "epoch": 0.46, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 5.692149942680335e-07, "loss": 0.1251, "projector_lr": 1.7076449828041003e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44140625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.09375, "sft_loss": 0.74609375, "step": 2883 }, { "dpo_loss": 0.158203125, "epoch": 0.46, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 5.689635431304536e-07, "loss": 0.1134, "projector_lr": 1.7068906293913609e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.984375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.15625, "sft_loss": 0.92578125, "step": 2884 }, { "dpo_loss": 0.44140625, "epoch": 0.46, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 5.687120742117556e-07, "loss": 0.2382, "projector_lr": 1.7061362226352668e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.09375, "sft_loss": 1.234375, "step": 2885 }, { "dpo_loss": 0.2080078125, "epoch": 0.46, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 5.684605875767765e-07, "loss": 0.2006, "projector_lr": 1.7053817627303297e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.875, "sft_loss": 0.67578125, "step": 2886 }, { "dpo_loss": 0.462890625, "epoch": 0.46, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 5.68209083290358e-07, "loss": 0.3466, "projector_lr": 1.704627249871074e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.765625, "rewards_train/margins": 2.765625, "rewards_train/rejected": -4.53125, "sft_loss": 1.0234375, "step": 2887 }, { "dpo_loss": 0.2294921875, "epoch": 0.46, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 5.679575614173464e-07, "loss": 0.1432, "projector_lr": 1.7038726842520394e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.8125, "sft_loss": 0.953125, "step": 2888 }, { "dpo_loss": 0.1435546875, "epoch": 0.46, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 5.677060220225923e-07, "loss": 0.2953, "projector_lr": 1.7031180660677772e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -7.25, "sft_loss": 1.0078125, "step": 2889 }, { "dpo_loss": 0.1962890625, "epoch": 0.46, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 5.674544651709513e-07, "loss": 0.1494, "projector_lr": 1.702363395512854e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 2.40625, "rewards_train/rejected": -2.984375, "sft_loss": 0.94921875, "step": 2890 }, { "dpo_loss": 0.197265625, "epoch": 0.46, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 5.672028909272829e-07, "loss": 0.128, "projector_lr": 1.7016086727818487e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.6875, "sft_loss": 1.015625, "step": 2891 }, { "dpo_loss": 0.248046875, "epoch": 0.46, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 5.669512993564516e-07, "loss": 0.2596, "projector_lr": 1.7008538980693548e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.78125, "sft_loss": 0.8984375, "step": 2892 }, { "dpo_loss": 0.318359375, "epoch": 0.46, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 5.666996905233261e-07, "loss": 0.3839, "projector_lr": 1.7000990715699784e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.46875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.5, "sft_loss": 0.75390625, "step": 2893 }, { "dpo_loss": 0.291015625, "epoch": 0.46, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 5.664480644927796e-07, "loss": 0.2737, "projector_lr": 1.6993441934783389e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 1.9375, "rewards_train/rejected": -3.984375, "sft_loss": 1.1015625, "step": 2894 }, { "dpo_loss": 0.2333984375, "epoch": 0.46, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 5.661964213296897e-07, "loss": 0.2217, "projector_lr": 1.6985892639890692e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.0625, "sft_loss": 0.5234375, "step": 2895 }, { "dpo_loss": 0.019287109375, "epoch": 0.46, "final_loss": 0.019287109375, "grad_norm": 0.0, "learning_rate": 5.659447610989386e-07, "loss": 0.0324, "projector_lr": 1.697834283296816e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.046875, "rewards_train/margins": 6.84375, "rewards_train/rejected": -8.875, "sft_loss": 1.03125, "step": 2896 }, { "dpo_loss": 0.05517578125, "epoch": 0.46, "final_loss": 0.05517578125, "grad_norm": 0.0, "learning_rate": 5.656930838654127e-07, "loss": 0.0826, "projector_lr": 1.6970792515962382e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.0625, "sft_loss": 0.8515625, "step": 2897 }, { "dpo_loss": 0.04638671875, "epoch": 0.46, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 5.654413896940027e-07, "loss": 0.2742, "projector_lr": 1.6963241690820081e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.09375, "sft_loss": 0.7265625, "step": 2898 }, { "dpo_loss": 0.134765625, "epoch": 0.46, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 5.651896786496041e-07, "loss": 0.116, "projector_lr": 1.6955690359488123e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.84375, "sft_loss": 0.79296875, "step": 2899 }, { "dpo_loss": 0.41796875, "epoch": 0.46, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 5.649379507971161e-07, "loss": 0.2134, "projector_lr": 1.6948138523913486e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.21875, "sft_loss": 0.74609375, "step": 2900 }, { "dpo_loss": 0.049560546875, "epoch": 0.46, "final_loss": 0.049560546875, "grad_norm": 0.0, "learning_rate": 5.64686206201443e-07, "loss": 0.1932, "projector_lr": 1.6940586186043292e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.84375, "sft_loss": 0.74609375, "step": 2901 }, { "dpo_loss": 0.08056640625, "epoch": 0.46, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 5.644344449274927e-07, "loss": 0.0813, "projector_lr": 1.6933033347824782e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.8125, "sft_loss": 0.87109375, "step": 2902 }, { "dpo_loss": 0.11376953125, "epoch": 0.46, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 5.641826670401777e-07, "loss": 0.268, "projector_lr": 1.6925480011205333e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.34375, "sft_loss": 0.6640625, "step": 2903 }, { "dpo_loss": 0.0498046875, "epoch": 0.46, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 5.639308726044152e-07, "loss": 0.0509, "projector_lr": 1.6917926178132457e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.71875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.625, "sft_loss": 1.0546875, "step": 2904 }, { "dpo_loss": 0.0498046875, "epoch": 0.46, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 5.636790616851258e-07, "loss": 0.0593, "projector_lr": 1.6910371850553776e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.21875, "sft_loss": 0.6953125, "step": 2905 }, { "dpo_loss": 0.06640625, "epoch": 0.46, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 5.63427234347235e-07, "loss": 0.17, "projector_lr": 1.6902817030417053e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.15625, "sft_loss": 0.8515625, "step": 2906 }, { "dpo_loss": 0.6796875, "epoch": 0.47, "final_loss": 0.6796875, "grad_norm": 0.0, "learning_rate": 5.631753906556725e-07, "loss": 0.3709, "projector_lr": 1.6895261719670173e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.703125, "rewards_train/margins": 3.4375, "rewards_train/rejected": -5.15625, "sft_loss": 0.7421875, "step": 2907 }, { "dpo_loss": 0.14453125, "epoch": 0.47, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 5.629235306753718e-07, "loss": 0.2316, "projector_lr": 1.6887705920261156e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.4375, "sft_loss": 0.59375, "step": 2908 }, { "dpo_loss": 0.146484375, "epoch": 0.47, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 5.626716544712708e-07, "loss": 0.3441, "projector_lr": 1.6880149634138126e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.78125, "sft_loss": 0.7109375, "step": 2909 }, { "dpo_loss": 0.1484375, "epoch": 0.47, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 5.624197621083122e-07, "loss": 0.1506, "projector_lr": 1.6872592863249367e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.375, "sft_loss": 2.03125, "step": 2910 }, { "dpo_loss": 0.10400390625, "epoch": 0.47, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 5.621678536514417e-07, "loss": 0.1173, "projector_lr": 1.6865035609543252e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.15625, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.625, "sft_loss": 0.7578125, "step": 2911 }, { "dpo_loss": 0.40234375, "epoch": 0.47, "final_loss": 0.40234375, "grad_norm": 0.0, "learning_rate": 5.6191592916561e-07, "loss": 0.2128, "projector_lr": 1.6857477874968302e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.71875, "sft_loss": 0.9609375, "step": 2912 }, { "dpo_loss": 0.026611328125, "epoch": 0.47, "final_loss": 0.026611328125, "grad_norm": 0.0, "learning_rate": 5.616639887157718e-07, "loss": 0.015, "projector_lr": 1.6849919661473156e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.40625, "sft_loss": 0.69921875, "step": 2913 }, { "dpo_loss": 0.640625, "epoch": 0.47, "final_loss": 0.640625, "grad_norm": 0.0, "learning_rate": 5.614120323668856e-07, "loss": 0.3774, "projector_lr": 1.684236097100657e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4375, "rewards_train/margins": 1.8828125, "rewards_train/rejected": -3.3125, "sft_loss": 0.76953125, "step": 2914 }, { "dpo_loss": 0.004852294921875, "epoch": 0.47, "final_loss": 0.004852294921875, "grad_norm": 0.0, "learning_rate": 5.611600601839143e-07, "loss": 0.2087, "projector_lr": 1.6834801805517432e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.21875, "sft_loss": 0.6640625, "step": 2915 }, { "dpo_loss": 0.07666015625, "epoch": 0.47, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 5.60908072231825e-07, "loss": 0.2601, "projector_lr": 1.6827242166954752e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.59375, "sft_loss": 1.796875, "step": 2916 }, { "dpo_loss": 0.1806640625, "epoch": 0.47, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 5.606560685755883e-07, "loss": 0.2137, "projector_lr": 1.6819682057267652e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.640625, "rewards_train/margins": 4.4375, "rewards_train/rejected": -6.09375, "sft_loss": 0.67578125, "step": 2917 }, { "dpo_loss": 0.1015625, "epoch": 0.47, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 5.604040492801794e-07, "loss": 0.0735, "projector_lr": 1.6812121478405383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 7.34375, "rewards_train/rejected": -8.25, "sft_loss": 0.640625, "step": 2918 }, { "dpo_loss": 0.0225830078125, "epoch": 0.47, "final_loss": 0.0225830078125, "grad_norm": 0.0, "learning_rate": 5.601520144105775e-07, "loss": 0.0837, "projector_lr": 1.6804560432317325e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.46875, "sft_loss": 0.78125, "step": 2919 }, { "dpo_loss": 0.15625, "epoch": 0.47, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 5.598999640317652e-07, "loss": 0.3057, "projector_lr": 1.6796998920952957e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.015625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.6875, "sft_loss": 0.7890625, "step": 2920 }, { "dpo_loss": 0.05517578125, "epoch": 0.47, "final_loss": 0.05517578125, "grad_norm": 0.0, "learning_rate": 5.596478982087299e-07, "loss": 0.0703, "projector_lr": 1.6789436946261899e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.96875, "sft_loss": 0.8359375, "step": 2921 }, { "dpo_loss": 0.0810546875, "epoch": 0.47, "final_loss": 0.0810546875, "grad_norm": 0.0, "learning_rate": 5.593958170064627e-07, "loss": 0.059, "projector_lr": 1.6781874510193884e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.1875, "sft_loss": 0.7109375, "step": 2922 }, { "dpo_loss": 0.072265625, "epoch": 0.47, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 5.591437204899585e-07, "loss": 0.2475, "projector_lr": 1.6774311614698757e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.53125, "sft_loss": 1.0078125, "step": 2923 }, { "dpo_loss": 0.1796875, "epoch": 0.47, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 5.588916087242161e-07, "loss": 0.3758, "projector_lr": 1.6766748261726485e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.28125, "sft_loss": 0.96875, "step": 2924 }, { "dpo_loss": 0.291015625, "epoch": 0.47, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 5.586394817742386e-07, "loss": 0.1588, "projector_lr": 1.675918445322716e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.8125, "sft_loss": 0.484375, "step": 2925 }, { "dpo_loss": 0.2578125, "epoch": 0.47, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 5.583873397050327e-07, "loss": 0.2333, "projector_lr": 1.6751620191150983e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2578125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -3.5625, "sft_loss": 0.828125, "step": 2926 }, { "dpo_loss": 0.203125, "epoch": 0.47, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 5.581351825816091e-07, "loss": 0.3797, "projector_lr": 1.6744055477448273e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.21875, "sft_loss": 0.671875, "step": 2927 }, { "dpo_loss": 0.1328125, "epoch": 0.47, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 5.578830104689824e-07, "loss": 0.0886, "projector_lr": 1.6736490314069475e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 5.46875, "rewards_train/rejected": -7.59375, "sft_loss": 0.85546875, "step": 2928 }, { "dpo_loss": 0.06396484375, "epoch": 0.47, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 5.576308234321709e-07, "loss": 0.0693, "projector_lr": 1.6728924702965129e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.9375, "sft_loss": 1.03125, "step": 2929 }, { "dpo_loss": 0.416015625, "epoch": 0.47, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 5.573786215361971e-07, "loss": 0.2895, "projector_lr": 1.6721358646085914e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.96875, "sft_loss": 0.9140625, "step": 2930 }, { "dpo_loss": 0.07275390625, "epoch": 0.47, "final_loss": 0.07275390625, "grad_norm": 0.0, "learning_rate": 5.571264048460871e-07, "loss": 0.1051, "projector_lr": 1.6713792145382615e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.28125, "sft_loss": 0.90234375, "step": 2931 }, { "dpo_loss": 0.10107421875, "epoch": 0.47, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 5.568741734268705e-07, "loss": 0.1109, "projector_lr": 1.670622520280612e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.3125, "sft_loss": 0.75390625, "step": 2932 }, { "dpo_loss": 0.10791015625, "epoch": 0.47, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 5.566219273435817e-07, "loss": 0.072, "projector_lr": 1.6698657820307452e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.96875, "sft_loss": 0.76953125, "step": 2933 }, { "dpo_loss": 0.259765625, "epoch": 0.47, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 5.563696666612576e-07, "loss": 0.1395, "projector_lr": 1.669108999983773e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 3.65625, "rewards_train/rejected": -5.46875, "sft_loss": 0.984375, "step": 2934 }, { "dpo_loss": 0.060546875, "epoch": 0.47, "final_loss": 0.060546875, "grad_norm": 0.0, "learning_rate": 5.561173914449398e-07, "loss": 0.334, "projector_lr": 1.6683521743348193e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.96875, "sft_loss": 0.69921875, "step": 2935 }, { "dpo_loss": 0.4609375, "epoch": 0.47, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 5.55865101759673e-07, "loss": 0.2697, "projector_lr": 1.6675953052790194e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -7.0625, "sft_loss": 0.765625, "step": 2936 }, { "dpo_loss": 0.3125, "epoch": 0.47, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 5.556127976705064e-07, "loss": 0.4284, "projector_lr": 1.6668383930115193e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.625, "sft_loss": 0.84375, "step": 2937 }, { "dpo_loss": 0.390625, "epoch": 0.47, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 5.553604792424922e-07, "loss": 0.4217, "projector_lr": 1.6660814377274767e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.0625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.46875, "sft_loss": 0.9375, "step": 2938 }, { "dpo_loss": 0.036376953125, "epoch": 0.47, "final_loss": 0.036376953125, "grad_norm": 0.0, "learning_rate": 5.551081465406865e-07, "loss": 0.095, "projector_lr": 1.6653244396220595e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.3125, "sft_loss": 0.75390625, "step": 2939 }, { "dpo_loss": 0.205078125, "epoch": 0.47, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 5.548557996301494e-07, "loss": 0.2038, "projector_lr": 1.6645673988904483e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.28125, "sft_loss": 0.796875, "step": 2940 }, { "dpo_loss": 0.4375, "epoch": 0.47, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 5.546034385759441e-07, "loss": 0.5059, "projector_lr": 1.6638103157278326e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.75, "sft_loss": 0.66796875, "step": 2941 }, { "dpo_loss": 0.62109375, "epoch": 0.47, "final_loss": 0.62109375, "grad_norm": 0.0, "learning_rate": 5.543510634431382e-07, "loss": 0.4203, "projector_lr": 1.6630531903294147e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 2.15625, "rewards_train/rejected": -3.71875, "sft_loss": 0.859375, "step": 2942 }, { "dpo_loss": 0.453125, "epoch": 0.47, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 5.540986742968021e-07, "loss": 0.2659, "projector_lr": 1.6622960228904065e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.96875, "sft_loss": 0.73046875, "step": 2943 }, { "dpo_loss": 0.185546875, "epoch": 0.47, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 5.538462712020103e-07, "loss": 0.28, "projector_lr": 1.661538813606031e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.84375, "sft_loss": 0.55859375, "step": 2944 }, { "dpo_loss": 0.08740234375, "epoch": 0.47, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 5.535938542238409e-07, "loss": 0.0856, "projector_lr": 1.6607815626715227e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.8125, "sft_loss": 1.5546875, "step": 2945 }, { "dpo_loss": 0.0859375, "epoch": 0.47, "final_loss": 0.0859375, "grad_norm": 0.0, "learning_rate": 5.533414234273753e-07, "loss": 0.055, "projector_lr": 1.660024270282126e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.125, "sft_loss": 0.6796875, "step": 2946 }, { "dpo_loss": 0.373046875, "epoch": 0.47, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 5.530889788776988e-07, "loss": 0.2449, "projector_lr": 1.6592669366330964e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.375, "rewards_train/margins": 2.3125, "rewards_train/rejected": -3.6875, "sft_loss": 1.0078125, "step": 2947 }, { "dpo_loss": 0.12255859375, "epoch": 0.47, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 5.528365206399e-07, "loss": 0.3586, "projector_lr": 1.6585095619196998e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.15625, "sft_loss": 0.83203125, "step": 2948 }, { "dpo_loss": 0.1728515625, "epoch": 0.47, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 5.52584048779071e-07, "loss": 0.2242, "projector_lr": 1.6577521463372132e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.953125, "sft_loss": 0.578125, "step": 2949 }, { "dpo_loss": 0.19921875, "epoch": 0.47, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 5.523315633603078e-07, "loss": 0.1936, "projector_lr": 1.6569946900809236e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.703125, "rewards_train/margins": 3.953125, "rewards_train/rejected": -5.65625, "sft_loss": 1.7265625, "step": 2950 }, { "dpo_loss": 0.30078125, "epoch": 0.47, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 5.520790644487095e-07, "loss": 0.2892, "projector_lr": 1.6562371933461288e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 3.140625, "rewards_train/rejected": -4.84375, "sft_loss": 0.890625, "step": 2951 }, { "dpo_loss": 0.1689453125, "epoch": 0.47, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 5.518265521093788e-07, "loss": 0.1276, "projector_lr": 1.6554796563281365e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 7.5, "rewards_train/rejected": -8.875, "sft_loss": 0.6796875, "step": 2952 }, { "dpo_loss": 0.09521484375, "epoch": 0.47, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 5.51574026407422e-07, "loss": 0.1011, "projector_lr": 1.6547220792222658e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.53125, "sft_loss": 1.0390625, "step": 2953 }, { "dpo_loss": 0.2353515625, "epoch": 0.47, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 5.513214874079483e-07, "loss": 0.1251, "projector_lr": 1.653964462223845e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 3.703125, "rewards_train/rejected": -3.875, "sft_loss": 0.6015625, "step": 2954 }, { "dpo_loss": 0.068359375, "epoch": 0.47, "final_loss": 0.068359375, "grad_norm": 0.0, "learning_rate": 5.51068935176071e-07, "loss": 0.0983, "projector_lr": 1.653206805528213e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.59375, "sft_loss": 0.68359375, "step": 2955 }, { "dpo_loss": 0.3515625, "epoch": 0.47, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 5.508163697769067e-07, "loss": 0.2583, "projector_lr": 1.6524491093307202e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.09375, "sft_loss": 0.5859375, "step": 2956 }, { "dpo_loss": 0.0390625, "epoch": 0.47, "final_loss": 0.0390625, "grad_norm": 0.0, "learning_rate": 5.50563791275575e-07, "loss": 0.1085, "projector_lr": 1.651691373826725e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.96875, "sft_loss": 0.77734375, "step": 2957 }, { "dpo_loss": 0.1806640625, "epoch": 0.47, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 5.503111997371992e-07, "loss": 0.1463, "projector_lr": 1.6509335992115977e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.25, "sft_loss": 0.98046875, "step": 2958 }, { "dpo_loss": 0.271484375, "epoch": 0.47, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 5.50058595226906e-07, "loss": 0.3187, "projector_lr": 1.650175785680718e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.9375, "sft_loss": 0.84375, "step": 2959 }, { "dpo_loss": 0.1142578125, "epoch": 0.47, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 5.498059778098252e-07, "loss": 0.1929, "projector_lr": 1.6494179334294757e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.5, "sft_loss": 0.6640625, "step": 2960 }, { "dpo_loss": 0.15625, "epoch": 0.47, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 5.495533475510901e-07, "loss": 0.3096, "projector_lr": 1.6486600426532702e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.625, "rewards_train/margins": 3.5625, "rewards_train/rejected": -5.1875, "sft_loss": 0.8671875, "step": 2961 }, { "dpo_loss": 0.035888671875, "epoch": 0.47, "final_loss": 0.035888671875, "grad_norm": 0.0, "learning_rate": 5.493007045158372e-07, "loss": 0.1525, "projector_lr": 1.6479021135475118e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.125, "sft_loss": 0.859375, "step": 2962 }, { "dpo_loss": 0.10009765625, "epoch": 0.47, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 5.490480487692064e-07, "loss": 0.1836, "projector_lr": 1.6471441463076194e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.53125, "sft_loss": 0.87109375, "step": 2963 }, { "dpo_loss": 0.0751953125, "epoch": 0.47, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 5.487953803763409e-07, "loss": 0.145, "projector_lr": 1.6463861411290226e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.9375, "sft_loss": 0.9296875, "step": 2964 }, { "dpo_loss": 0.056396484375, "epoch": 0.47, "final_loss": 0.056396484375, "grad_norm": 0.0, "learning_rate": 5.485426994023871e-07, "loss": 0.092, "projector_lr": 1.6456280982071616e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 7.125, "rewards_train/rejected": -8.375, "sft_loss": 0.69140625, "step": 2965 }, { "dpo_loss": 0.296875, "epoch": 0.47, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 5.482900059124947e-07, "loss": 0.273, "projector_lr": 1.644870017737484e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.796875, "sft_loss": 0.9921875, "step": 2966 }, { "dpo_loss": 0.1162109375, "epoch": 0.47, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 5.480372999718163e-07, "loss": 0.1471, "projector_lr": 1.6441118999154492e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.140625, "rewards_train/rejected": -4.625, "sft_loss": 0.96484375, "step": 2967 }, { "dpo_loss": 0.37109375, "epoch": 0.47, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 5.477845816455082e-07, "loss": 0.2934, "projector_lr": 1.6433537449365249e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 2.3125, "rewards_train/rejected": -3.3125, "sft_loss": 0.7421875, "step": 2968 }, { "dpo_loss": 0.216796875, "epoch": 0.48, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 5.475318509987297e-07, "loss": 0.1756, "projector_lr": 1.6425955529961894e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.59375, "sft_loss": 0.8046875, "step": 2969 }, { "dpo_loss": 0.0849609375, "epoch": 0.48, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 5.472791080966431e-07, "loss": 0.1532, "projector_lr": 1.6418373242899294e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.0, "sft_loss": 0.9296875, "step": 2970 }, { "dpo_loss": 0.04248046875, "epoch": 0.48, "final_loss": 0.04248046875, "grad_norm": 0.0, "learning_rate": 5.470263530044142e-07, "loss": 0.1046, "projector_lr": 1.641079059013243e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.875, "sft_loss": 0.80078125, "step": 2971 }, { "dpo_loss": 0.2236328125, "epoch": 0.48, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 5.467735857872116e-07, "loss": 0.2072, "projector_lr": 1.6403207573616348e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.53125, "rewards_train/rejected": -4.96875, "sft_loss": 0.86328125, "step": 2972 }, { "dpo_loss": 0.25390625, "epoch": 0.48, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 5.465208065102071e-07, "loss": 0.4269, "projector_lr": 1.6395624195306213e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 2.0625, "rewards_train/rejected": -3.046875, "sft_loss": 0.703125, "step": 2973 }, { "dpo_loss": 0.150390625, "epoch": 0.48, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 5.462680152385759e-07, "loss": 0.0776, "projector_lr": 1.6388040457157279e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.5625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -7.125, "sft_loss": 1.171875, "step": 2974 }, { "dpo_loss": 0.36328125, "epoch": 0.48, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 5.46015212037496e-07, "loss": 0.3966, "projector_lr": 1.638045636112488e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.28125, "sft_loss": 0.921875, "step": 2975 }, { "dpo_loss": 0.041748046875, "epoch": 0.48, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 5.457623969721486e-07, "loss": 0.0403, "projector_lr": 1.6372871909164461e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.21875, "sft_loss": 0.61328125, "step": 2976 }, { "dpo_loss": 0.119140625, "epoch": 0.48, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 5.455095701077179e-07, "loss": 0.312, "projector_lr": 1.636528710323154e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.75, "sft_loss": 0.8515625, "step": 2977 }, { "dpo_loss": 0.0712890625, "epoch": 0.48, "final_loss": 0.0712890625, "grad_norm": 0.0, "learning_rate": 5.452567315093912e-07, "loss": 0.1175, "projector_lr": 1.6357701945281735e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.25, "sft_loss": 0.765625, "step": 2978 }, { "dpo_loss": 0.345703125, "epoch": 0.48, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 5.450038812423584e-07, "loss": 0.2261, "projector_lr": 1.6350116437270753e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53125, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.90625, "sft_loss": 0.65234375, "step": 2979 }, { "dpo_loss": 0.357421875, "epoch": 0.48, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 5.447510193718134e-07, "loss": 0.2111, "projector_lr": 1.6342530581154404e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.109375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -8.0625, "sft_loss": 0.8125, "step": 2980 }, { "dpo_loss": 0.10498046875, "epoch": 0.48, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 5.44498145962952e-07, "loss": 0.0981, "projector_lr": 1.6334944378888564e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.96875, "sft_loss": 0.77734375, "step": 2981 }, { "dpo_loss": 0.30078125, "epoch": 0.48, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 5.442452610809738e-07, "loss": 0.2614, "projector_lr": 1.6327357832429216e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.40625, "rewards_train/margins": 4.125, "rewards_train/rejected": -6.53125, "sft_loss": 1.0859375, "step": 2982 }, { "dpo_loss": 0.025390625, "epoch": 0.48, "final_loss": 0.025390625, "grad_norm": 0.0, "learning_rate": 5.43992364791081e-07, "loss": 0.022, "projector_lr": 1.6319770943732429e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.25, "rewards_train/margins": 6.71875, "rewards_train/rejected": -8.9375, "sft_loss": 0.87890625, "step": 2983 }, { "dpo_loss": 0.15625, "epoch": 0.48, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 5.437394571584783e-07, "loss": 0.4425, "projector_lr": 1.6312183714754351e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.6875, "sft_loss": 0.6484375, "step": 2984 }, { "dpo_loss": 0.0308837890625, "epoch": 0.48, "final_loss": 0.0308837890625, "grad_norm": 0.0, "learning_rate": 5.434865382483745e-07, "loss": 0.0984, "projector_lr": 1.6304596147451234e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.96875, "sft_loss": 1.0, "step": 2985 }, { "dpo_loss": 0.12353515625, "epoch": 0.48, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 5.432336081259798e-07, "loss": 0.1389, "projector_lr": 1.6297008243779396e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.236328125, "rewards_train/margins": 3.625, "rewards_train/rejected": -3.859375, "sft_loss": 0.6953125, "step": 2986 }, { "dpo_loss": 0.296875, "epoch": 0.48, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 5.429806668565088e-07, "loss": 0.2548, "projector_lr": 1.6289420005695264e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.15625, "sft_loss": 0.77734375, "step": 2987 }, { "dpo_loss": 0.06005859375, "epoch": 0.48, "final_loss": 0.06005859375, "grad_norm": 0.0, "learning_rate": 5.427277145051777e-07, "loss": 0.0592, "projector_lr": 1.6281831435155333e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30078125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.0, "sft_loss": 0.80859375, "step": 2988 }, { "dpo_loss": 0.5078125, "epoch": 0.48, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 5.424747511372066e-07, "loss": 0.3655, "projector_lr": 1.62742425341162e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.875, "sft_loss": 0.69921875, "step": 2989 }, { "dpo_loss": 0.1728515625, "epoch": 0.48, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 5.422217768178177e-07, "loss": 0.1868, "projector_lr": 1.6266653304534532e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.15625, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.78125, "sft_loss": 0.59765625, "step": 2990 }, { "dpo_loss": 0.263671875, "epoch": 0.48, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 5.419687916122361e-07, "loss": 0.2681, "projector_lr": 1.6259063748367083e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 2.453125, "rewards_train/rejected": -4.21875, "sft_loss": 1.0703125, "step": 2991 }, { "dpo_loss": 0.0595703125, "epoch": 0.48, "final_loss": 0.0595703125, "grad_norm": 0.0, "learning_rate": 5.417157955856902e-07, "loss": 0.1128, "projector_lr": 1.625147386757071e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.96875, "sft_loss": 0.91796875, "step": 2992 }, { "dpo_loss": 0.10791015625, "epoch": 0.48, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 5.414627888034108e-07, "loss": 0.104, "projector_lr": 1.6243883664102325e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.75, "sft_loss": 0.73046875, "step": 2993 }, { "dpo_loss": 0.240234375, "epoch": 0.48, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 5.412097713306315e-07, "loss": 0.2032, "projector_lr": 1.6236293139918946e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.279296875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.34375, "sft_loss": 0.609375, "step": 2994 }, { "dpo_loss": 0.0205078125, "epoch": 0.48, "final_loss": 0.0205078125, "grad_norm": 0.0, "learning_rate": 5.409567432325887e-07, "loss": 0.1136, "projector_lr": 1.6228702296977661e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.171875, "rewards_train/margins": 5.875, "rewards_train/rejected": -8.0625, "sft_loss": 1.203125, "step": 2995 }, { "dpo_loss": 0.083984375, "epoch": 0.48, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 5.407037045745214e-07, "loss": 0.1838, "projector_lr": 1.6221111137235644e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -7.28125, "sft_loss": 0.93359375, "step": 2996 }, { "dpo_loss": 0.283203125, "epoch": 0.48, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 5.404506554216719e-07, "loss": 0.1656, "projector_lr": 1.6213519662650157e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.28125, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.125, "sft_loss": 0.73828125, "step": 2997 }, { "dpo_loss": 0.1533203125, "epoch": 0.48, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 5.401975958392843e-07, "loss": 0.211, "projector_lr": 1.6205927875178528e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.9375, "sft_loss": 0.88671875, "step": 2998 }, { "dpo_loss": 0.0211181640625, "epoch": 0.48, "final_loss": 0.0211181640625, "grad_norm": 0.0, "learning_rate": 5.399445258926061e-07, "loss": 0.0421, "projector_lr": 1.6198335776778183e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.78125, "sft_loss": 0.470703125, "step": 2999 }, { "dpo_loss": 0.251953125, "epoch": 0.48, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 5.396914456468872e-07, "loss": 0.1923, "projector_lr": 1.6190743369406616e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.84375, "sft_loss": 0.96875, "step": 3000 }, { "dpo_loss": 0.052734375, "epoch": 0.48, "final_loss": 0.052734375, "grad_norm": 0.0, "learning_rate": 5.394383551673801e-07, "loss": 0.1374, "projector_lr": 1.6183150655021406e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.3125, "sft_loss": 0.59765625, "step": 3001 }, { "dpo_loss": 0.6328125, "epoch": 0.48, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 5.391852545193402e-07, "loss": 0.3847, "projector_lr": 1.6175557635580206e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.96875, "sft_loss": 0.83984375, "step": 3002 }, { "dpo_loss": 0.31640625, "epoch": 0.48, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 5.389321437680252e-07, "loss": 0.2337, "projector_lr": 1.6167964313040757e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.71875, "sft_loss": 0.83984375, "step": 3003 }, { "dpo_loss": 0.53515625, "epoch": 0.48, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 5.386790229786956e-07, "loss": 0.3427, "projector_lr": 1.6160370689360869e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.125, "sft_loss": 0.71875, "step": 3004 }, { "dpo_loss": 0.2373046875, "epoch": 0.48, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 5.384258922166146e-07, "loss": 0.2713, "projector_lr": 1.615277676649844e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6875, "rewards_train/margins": 6.96875, "rewards_train/rejected": -8.625, "sft_loss": 0.8125, "step": 3005 }, { "dpo_loss": 0.44140625, "epoch": 0.48, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 5.381727515470476e-07, "loss": 0.2341, "projector_lr": 1.6145182546411428e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.703125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -6.0, "sft_loss": 0.80859375, "step": 3006 }, { "dpo_loss": 0.059326171875, "epoch": 0.48, "final_loss": 0.059326171875, "grad_norm": 0.0, "learning_rate": 5.379196010352627e-07, "loss": 0.121, "projector_lr": 1.6137588031057886e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.4375, "sft_loss": 0.72265625, "step": 3007 }, { "dpo_loss": 0.08203125, "epoch": 0.48, "final_loss": 0.08203125, "grad_norm": 0.0, "learning_rate": 5.376664407465309e-07, "loss": 0.0667, "projector_lr": 1.6129993222395927e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.84375, "sft_loss": 0.63671875, "step": 3008 }, { "dpo_loss": 0.138671875, "epoch": 0.48, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 5.374132707461252e-07, "loss": 0.1244, "projector_lr": 1.6122398122383756e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.84375, "sft_loss": 0.57421875, "step": 3009 }, { "dpo_loss": 0.0191650390625, "epoch": 0.48, "final_loss": 0.0191650390625, "grad_norm": 0.0, "learning_rate": 5.371600910993215e-07, "loss": 0.0863, "projector_lr": 1.6114802732979644e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.890625, "rewards_train/margins": 7.09375, "rewards_train/rejected": -8.0, "sft_loss": 0.4375, "step": 3010 }, { "dpo_loss": 0.177734375, "epoch": 0.48, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 5.369069018713979e-07, "loss": 0.1714, "projector_lr": 1.6107207056141937e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.34375, "sft_loss": 0.87890625, "step": 3011 }, { "dpo_loss": 0.341796875, "epoch": 0.48, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 5.36653703127635e-07, "loss": 0.7391, "projector_lr": 1.6099611093829052e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.109375, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.5, "sft_loss": 0.984375, "step": 3012 }, { "dpo_loss": 0.6015625, "epoch": 0.48, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 5.364004949333162e-07, "loss": 0.3201, "projector_lr": 1.6092014847999486e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -3.953125, "sft_loss": 0.87890625, "step": 3013 }, { "dpo_loss": 0.5625, "epoch": 0.48, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 5.361472773537269e-07, "loss": 0.3538, "projector_lr": 1.6084418320611808e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.0625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -6.09375, "sft_loss": 0.98828125, "step": 3014 }, { "dpo_loss": 0.125, "epoch": 0.48, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 5.358940504541551e-07, "loss": 0.2297, "projector_lr": 1.6076821513624651e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.15625, "sft_loss": 0.60546875, "step": 3015 }, { "dpo_loss": 0.5703125, "epoch": 0.48, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 5.356408142998912e-07, "loss": 0.3154, "projector_lr": 1.6069224428996736e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 0.9140625, "rewards_train/rejected": -2.75, "sft_loss": 0.73828125, "step": 3016 }, { "dpo_loss": 0.279296875, "epoch": 0.48, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 5.35387568956228e-07, "loss": 0.4608, "projector_lr": 1.606162706868684e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.875, "sft_loss": 0.92578125, "step": 3017 }, { "dpo_loss": 0.0145263671875, "epoch": 0.48, "final_loss": 0.0145263671875, "grad_norm": 0.0, "learning_rate": 5.351343144884606e-07, "loss": 0.1172, "projector_lr": 1.6054029434653822e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.78125, "rewards_train/margins": 6.75, "rewards_train/rejected": -8.5, "sft_loss": 0.8984375, "step": 3018 }, { "dpo_loss": 0.50390625, "epoch": 0.48, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 5.348810509618868e-07, "loss": 0.4977, "projector_lr": 1.6046431528856607e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 1.796875, "rewards_train/rejected": -2.890625, "sft_loss": 0.71484375, "step": 3019 }, { "dpo_loss": 0.060302734375, "epoch": 0.48, "final_loss": 0.060302734375, "grad_norm": 0.0, "learning_rate": 5.34627778441806e-07, "loss": 0.1274, "projector_lr": 1.6038833353254182e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.375, "sft_loss": 0.9765625, "step": 3020 }, { "dpo_loss": 0.1904296875, "epoch": 0.48, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 5.343744969935209e-07, "loss": 0.1963, "projector_lr": 1.6031234909805629e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.40625, "sft_loss": 0.859375, "step": 3021 }, { "dpo_loss": 0.3515625, "epoch": 0.48, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 5.341212066823355e-07, "loss": 0.1996, "projector_lr": 1.6023636200470066e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.625, "rewards_train/margins": 3.0625, "rewards_train/rejected": -5.6875, "sft_loss": 0.921875, "step": 3022 }, { "dpo_loss": 0.0277099609375, "epoch": 0.48, "final_loss": 0.0277099609375, "grad_norm": 0.0, "learning_rate": 5.338679075735569e-07, "loss": 0.1757, "projector_lr": 1.6016037227206707e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.859375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.3125, "sft_loss": 0.7265625, "step": 3023 }, { "dpo_loss": 0.119140625, "epoch": 0.48, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 5.336145997324935e-07, "loss": 0.0881, "projector_lr": 1.6008437991974808e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.3125, "sft_loss": 0.6640625, "step": 3024 }, { "dpo_loss": 0.13671875, "epoch": 0.48, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 5.333612832244574e-07, "loss": 0.1654, "projector_lr": 1.6000838496733722e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.859375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.96875, "sft_loss": 0.71875, "step": 3025 }, { "dpo_loss": 0.099609375, "epoch": 0.48, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 5.331079581147615e-07, "loss": 0.0833, "projector_lr": 1.5993238743442847e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.0625, "sft_loss": 0.8125, "step": 3026 }, { "dpo_loss": 0.08837890625, "epoch": 0.48, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 5.328546244687217e-07, "loss": 0.0835, "projector_lr": 1.5985638734061654e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.3125, "sft_loss": 0.92578125, "step": 3027 }, { "dpo_loss": 0.12451171875, "epoch": 0.48, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 5.326012823516562e-07, "loss": 0.1481, "projector_lr": 1.597803847054969e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.21875, "sft_loss": 1.078125, "step": 3028 }, { "dpo_loss": 0.01031494140625, "epoch": 0.48, "final_loss": 0.01031494140625, "grad_norm": 0.0, "learning_rate": 5.323479318288847e-07, "loss": 0.2659, "projector_lr": 1.5970437954866542e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.90625, "sft_loss": 0.7421875, "step": 3029 }, { "dpo_loss": 0.1884765625, "epoch": 0.48, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 5.320945729657297e-07, "loss": 0.1625, "projector_lr": 1.5962837188971895e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.8125, "sft_loss": 0.88671875, "step": 3030 }, { "dpo_loss": 0.054931640625, "epoch": 0.48, "final_loss": 0.054931640625, "grad_norm": 0.0, "learning_rate": 5.318412058275157e-07, "loss": 0.1579, "projector_lr": 1.595523617482547e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.46875, "sft_loss": 0.6015625, "step": 3031 }, { "dpo_loss": 0.06396484375, "epoch": 0.49, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 5.31587830479569e-07, "loss": 0.3399, "projector_lr": 1.5947634914387071e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.40625, "sft_loss": 0.66796875, "step": 3032 }, { "dpo_loss": 0.314453125, "epoch": 0.49, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 5.313344469872184e-07, "loss": 0.1928, "projector_lr": 1.5940033409616553e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3984375, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.125, "sft_loss": 0.71484375, "step": 3033 }, { "dpo_loss": 0.189453125, "epoch": 0.49, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 5.310810554157946e-07, "loss": 0.2163, "projector_lr": 1.593243166247384e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.9375, "sft_loss": 0.640625, "step": 3034 }, { "dpo_loss": 0.099609375, "epoch": 0.49, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 5.308276558306307e-07, "loss": 0.1629, "projector_lr": 1.5924829674918923e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.6875, "sft_loss": 0.703125, "step": 3035 }, { "dpo_loss": 0.453125, "epoch": 0.49, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 5.305742482970615e-07, "loss": 0.4339, "projector_lr": 1.5917227448911844e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2451171875, "rewards_train/margins": 4.625, "rewards_train/rejected": -4.875, "sft_loss": 0.796875, "step": 3036 }, { "dpo_loss": 0.09521484375, "epoch": 0.49, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 5.303208328804237e-07, "loss": 0.2582, "projector_lr": 1.5909624986412713e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.09375, "sft_loss": 0.65234375, "step": 3037 }, { "dpo_loss": 0.275390625, "epoch": 0.49, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 5.300674096460568e-07, "loss": 0.2628, "projector_lr": 1.5902022289381704e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.0625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.6875, "sft_loss": 0.86328125, "step": 3038 }, { "dpo_loss": 0.271484375, "epoch": 0.49, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 5.298139786593014e-07, "loss": 0.1549, "projector_lr": 1.5894419359779044e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.0625, "sft_loss": 0.94921875, "step": 3039 }, { "dpo_loss": 0.1318359375, "epoch": 0.49, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 5.295605399855009e-07, "loss": 0.2264, "projector_lr": 1.5886816199565027e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.71875, "sft_loss": 0.765625, "step": 3040 }, { "dpo_loss": 0.28515625, "epoch": 0.49, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 5.293070936900001e-07, "loss": 0.2417, "projector_lr": 1.5879212810700002e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.46875, "sft_loss": 0.96875, "step": 3041 }, { "dpo_loss": 0.03857421875, "epoch": 0.49, "final_loss": 0.03857421875, "grad_norm": 0.0, "learning_rate": 5.290536398381458e-07, "loss": 0.0823, "projector_lr": 1.5871609195144374e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.375, "sft_loss": 0.73828125, "step": 3042 }, { "dpo_loss": 0.140625, "epoch": 0.49, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 5.288001784952871e-07, "loss": 0.4457, "projector_lr": 1.5864005354858616e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.21875, "sft_loss": 0.8984375, "step": 3043 }, { "dpo_loss": 0.111328125, "epoch": 0.49, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 5.285467097267749e-07, "loss": 0.2035, "projector_lr": 1.585640129180325e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.546875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -6.84375, "sft_loss": 0.67578125, "step": 3044 }, { "dpo_loss": 0.035888671875, "epoch": 0.49, "final_loss": 0.035888671875, "grad_norm": 0.0, "learning_rate": 5.28293233597962e-07, "loss": 0.134, "projector_lr": 1.584879700793886e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.59375, "sft_loss": 0.5859375, "step": 3045 }, { "dpo_loss": 0.447265625, "epoch": 0.49, "final_loss": 0.447265625, "grad_norm": 0.0, "learning_rate": 5.280397501742029e-07, "loss": 0.2449, "projector_lr": 1.5841192505226087e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9140625, "rewards_train/margins": 6.09375, "rewards_train/rejected": -8.0, "sft_loss": 0.6875, "step": 3046 }, { "dpo_loss": 0.1748046875, "epoch": 0.49, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 5.277862595208542e-07, "loss": 0.0914, "projector_lr": 1.5833587785625628e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.5, "sft_loss": 0.6640625, "step": 3047 }, { "dpo_loss": 0.20703125, "epoch": 0.49, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 5.275327617032743e-07, "loss": 0.1577, "projector_lr": 1.582598285109823e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.65625, "sft_loss": 1.1484375, "step": 3048 }, { "dpo_loss": 0.373046875, "epoch": 0.49, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 5.272792567868235e-07, "loss": 0.2629, "projector_lr": 1.5818377703604706e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.25, "sft_loss": 0.9609375, "step": 3049 }, { "dpo_loss": 0.052001953125, "epoch": 0.49, "final_loss": 0.052001953125, "grad_norm": 0.0, "learning_rate": 5.270257448368639e-07, "loss": 0.1901, "projector_lr": 1.5810772345105919e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 7.03125, "rewards_train/rejected": -8.0625, "sft_loss": 0.69140625, "step": 3050 }, { "dpo_loss": 0.031494140625, "epoch": 0.49, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 5.267722259187593e-07, "loss": 0.0744, "projector_lr": 1.5803166777562778e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.34375, "sft_loss": 0.59375, "step": 3051 }, { "dpo_loss": 0.1220703125, "epoch": 0.49, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 5.265187000978754e-07, "loss": 0.1159, "projector_lr": 1.5795561002936266e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.78125, "rewards_train/margins": 3.953125, "rewards_train/rejected": -6.71875, "sft_loss": 0.609375, "step": 3052 }, { "dpo_loss": 0.050048828125, "epoch": 0.49, "final_loss": 0.050048828125, "grad_norm": 0.0, "learning_rate": 5.262651674395798e-07, "loss": 0.079, "projector_lr": 1.5787955023187396e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.4375, "sft_loss": 0.9921875, "step": 3053 }, { "dpo_loss": 0.1240234375, "epoch": 0.49, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 5.260116280092416e-07, "loss": 0.3364, "projector_lr": 1.578034884027725e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.75, "sft_loss": 0.734375, "step": 3054 }, { "dpo_loss": 0.0145263671875, "epoch": 0.49, "final_loss": 0.0145263671875, "grad_norm": 0.0, "learning_rate": 5.25758081872232e-07, "loss": 0.1126, "projector_lr": 1.5772742456166959e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.625, "sft_loss": 0.68359375, "step": 3055 }, { "dpo_loss": 0.68359375, "epoch": 0.49, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 5.255045290939233e-07, "loss": 0.6064, "projector_lr": 1.57651358728177e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -2.15625, "rewards_train/margins": 1.828125, "rewards_train/rejected": -3.984375, "sft_loss": 0.9140625, "step": 3056 }, { "dpo_loss": 0.0341796875, "epoch": 0.49, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 5.252509697396903e-07, "loss": 0.0999, "projector_lr": 1.575752909219071e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -8.125, "sft_loss": 0.734375, "step": 3057 }, { "dpo_loss": 0.1376953125, "epoch": 0.49, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 5.24997403874909e-07, "loss": 0.2291, "projector_lr": 1.574992211624727e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.0625, "sft_loss": 0.75, "step": 3058 }, { "dpo_loss": 0.1962890625, "epoch": 0.49, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 5.247438315649572e-07, "loss": 0.4545, "projector_lr": 1.5742314946948715e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.15625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.5625, "sft_loss": 0.859375, "step": 3059 }, { "dpo_loss": 0.1728515625, "epoch": 0.49, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 5.244902528752143e-07, "loss": 0.1031, "projector_lr": 1.573470758625643e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.25, "sft_loss": 0.67578125, "step": 3060 }, { "dpo_loss": 0.421875, "epoch": 0.49, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 5.242366678710616e-07, "loss": 0.2395, "projector_lr": 1.572710003613185e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.28125, "sft_loss": 0.6328125, "step": 3061 }, { "dpo_loss": 0.1748046875, "epoch": 0.49, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 5.239830766178815e-07, "loss": 0.2829, "projector_lr": 1.5719492298536448e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -6.5, "sft_loss": 0.84375, "step": 3062 }, { "dpo_loss": 0.453125, "epoch": 0.49, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 5.237294791810585e-07, "loss": 0.3073, "projector_lr": 1.571188437543176e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.0, "sft_loss": 0.8046875, "step": 3063 }, { "dpo_loss": 0.06787109375, "epoch": 0.49, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 5.234758756259788e-07, "loss": 0.1124, "projector_lr": 1.5704276268779366e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.9375, "sft_loss": 0.796875, "step": 3064 }, { "dpo_loss": 0.6640625, "epoch": 0.49, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 5.232222660180295e-07, "loss": 0.4144, "projector_lr": 1.5696667980540888e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -6.28125, "sft_loss": 0.68359375, "step": 3065 }, { "dpo_loss": 0.10791015625, "epoch": 0.49, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 5.229686504226001e-07, "loss": 0.0883, "projector_lr": 1.5689059512678003e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.78125, "sft_loss": 0.74609375, "step": 3066 }, { "dpo_loss": 0.380859375, "epoch": 0.49, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 5.227150289050808e-07, "loss": 0.214, "projector_lr": 1.5681450867152424e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.5, "sft_loss": 0.9140625, "step": 3067 }, { "dpo_loss": 0.294921875, "epoch": 0.49, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 5.224614015308641e-07, "loss": 0.2855, "projector_lr": 1.5673842045925922e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.125, "sft_loss": 0.77734375, "step": 3068 }, { "dpo_loss": 0.193359375, "epoch": 0.49, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 5.222077683653433e-07, "loss": 0.1505, "projector_lr": 1.56662330509603e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.78125, "sft_loss": 0.9921875, "step": 3069 }, { "dpo_loss": 0.478515625, "epoch": 0.49, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 5.21954129473914e-07, "loss": 0.3216, "projector_lr": 1.565862388421742e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.6875, "sft_loss": 1.0546875, "step": 3070 }, { "dpo_loss": 0.06689453125, "epoch": 0.49, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 5.217004849219723e-07, "loss": 0.1622, "projector_lr": 1.5651014547659169e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.21875, "sft_loss": 0.8515625, "step": 3071 }, { "dpo_loss": 0.162109375, "epoch": 0.49, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 5.214468347749166e-07, "loss": 0.2205, "projector_lr": 1.5643405043247499e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9453125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -6.25, "sft_loss": 0.984375, "step": 3072 }, { "dpo_loss": 0.002197265625, "epoch": 0.49, "final_loss": 0.002197265625, "grad_norm": 0.0, "learning_rate": 5.211931790981466e-07, "loss": 0.0296, "projector_lr": 1.5635795372944396e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 7.25, "rewards_train/rejected": -8.125, "sft_loss": 0.60546875, "step": 3073 }, { "dpo_loss": 0.302734375, "epoch": 0.49, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 5.209395179570627e-07, "loss": 0.1843, "projector_lr": 1.5628185538711882e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.25, "sft_loss": 0.75390625, "step": 3074 }, { "dpo_loss": 0.447265625, "epoch": 0.49, "final_loss": 0.447265625, "grad_norm": 0.0, "learning_rate": 5.206858514170678e-07, "loss": 0.2449, "projector_lr": 1.5620575542512035e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.53125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.71875, "sft_loss": 0.82421875, "step": 3075 }, { "dpo_loss": 0.1474609375, "epoch": 0.49, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 5.204321795435656e-07, "loss": 0.4429, "projector_lr": 1.5612965386306968e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.21875, "sft_loss": 0.6640625, "step": 3076 }, { "dpo_loss": 0.1279296875, "epoch": 0.49, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 5.201785024019608e-07, "loss": 0.3404, "projector_lr": 1.5605355072058827e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.71875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.53125, "sft_loss": 0.65234375, "step": 3077 }, { "dpo_loss": 0.1689453125, "epoch": 0.49, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 5.199248200576603e-07, "loss": 0.1249, "projector_lr": 1.5597744601729813e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.34375, "sft_loss": 0.73046875, "step": 3078 }, { "dpo_loss": 0.1611328125, "epoch": 0.49, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 5.19671132576072e-07, "loss": 0.1108, "projector_lr": 1.559013397728216e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.625, "sft_loss": 0.890625, "step": 3079 }, { "dpo_loss": 0.126953125, "epoch": 0.49, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 5.194174400226047e-07, "loss": 0.2748, "projector_lr": 1.5582523200678141e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8125, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.8125, "sft_loss": 0.70703125, "step": 3080 }, { "dpo_loss": 0.185546875, "epoch": 0.49, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 5.19163742462669e-07, "loss": 0.1174, "projector_lr": 1.5574912273880071e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.625, "sft_loss": 0.86328125, "step": 3081 }, { "dpo_loss": 0.036865234375, "epoch": 0.49, "final_loss": 0.036865234375, "grad_norm": 0.0, "learning_rate": 5.189100399616769e-07, "loss": 0.0715, "projector_lr": 1.5567301198850306e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.068359375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.46875, "sft_loss": 0.7578125, "step": 3082 }, { "dpo_loss": 0.0380859375, "epoch": 0.49, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 5.186563325850409e-07, "loss": 0.0832, "projector_lr": 1.555968997755123e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.25, "sft_loss": 0.74609375, "step": 3083 }, { "dpo_loss": 0.7734375, "epoch": 0.49, "final_loss": 0.7734375, "grad_norm": 0.0, "learning_rate": 5.184026203981759e-07, "loss": 0.421, "projector_lr": 1.5552078611945277e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 1.609375, "rewards_train/rejected": -3.078125, "sft_loss": 1.015625, "step": 3084 }, { "dpo_loss": 0.06005859375, "epoch": 0.49, "final_loss": 0.06005859375, "grad_norm": 0.0, "learning_rate": 5.181489034664968e-07, "loss": 0.046, "projector_lr": 1.5544467103994907e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.21875, "sft_loss": 0.7421875, "step": 3085 }, { "dpo_loss": 0.07958984375, "epoch": 0.49, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 5.17895181855421e-07, "loss": 0.0577, "projector_lr": 1.5536855455662631e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.53125, "sft_loss": 0.82421875, "step": 3086 }, { "dpo_loss": 0.5390625, "epoch": 0.49, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 5.17641455630366e-07, "loss": 0.4106, "projector_lr": 1.552924366891098e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.0, "rewards_train/margins": 2.625, "rewards_train/rejected": -4.625, "sft_loss": 1.0546875, "step": 3087 }, { "dpo_loss": 0.1201171875, "epoch": 0.49, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 5.173877248567512e-07, "loss": 0.1087, "projector_lr": 1.5521631745702537e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.3125, "sft_loss": 0.65625, "step": 3088 }, { "dpo_loss": 0.09814453125, "epoch": 0.49, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 5.171339895999966e-07, "loss": 0.2823, "projector_lr": 1.5514019687999898e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 6.8125, "rewards_train/rejected": -7.84375, "sft_loss": 0.76171875, "step": 3089 }, { "dpo_loss": 0.08740234375, "epoch": 0.49, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 5.168802499255238e-07, "loss": 0.1355, "projector_lr": 1.5506407497765716e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.546875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.9375, "sft_loss": 0.69140625, "step": 3090 }, { "dpo_loss": 0.0206298828125, "epoch": 0.49, "final_loss": 0.0206298828125, "grad_norm": 0.0, "learning_rate": 5.166265058987558e-07, "loss": 0.0396, "projector_lr": 1.5498795176962675e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.90625, "sft_loss": 0.64453125, "step": 3091 }, { "dpo_loss": 0.06982421875, "epoch": 0.49, "final_loss": 0.06982421875, "grad_norm": 0.0, "learning_rate": 5.163727575851156e-07, "loss": 0.0539, "projector_lr": 1.549118272755347e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 6.625, "rewards_train/rejected": -7.71875, "sft_loss": 0.859375, "step": 3092 }, { "dpo_loss": 0.0213623046875, "epoch": 0.49, "final_loss": 0.0213623046875, "grad_norm": 0.0, "learning_rate": 5.161190050500288e-07, "loss": 0.0242, "projector_lr": 1.5483570151500865e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.21875, "sft_loss": 0.71875, "step": 3093 }, { "dpo_loss": 0.189453125, "epoch": 0.5, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 5.158652483589208e-07, "loss": 0.4006, "projector_lr": 1.5475957450767627e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.375, "sft_loss": 0.6875, "step": 3094 }, { "dpo_loss": 0.05126953125, "epoch": 0.5, "final_loss": 0.05126953125, "grad_norm": 0.0, "learning_rate": 5.156114875772189e-07, "loss": 0.252, "projector_lr": 1.546834462731657e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.375, "sft_loss": 1.0390625, "step": 3095 }, { "dpo_loss": 0.169921875, "epoch": 0.5, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 5.15357722770351e-07, "loss": 0.1068, "projector_lr": 1.546073168311053e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.5, "sft_loss": 0.65234375, "step": 3096 }, { "dpo_loss": 0.11376953125, "epoch": 0.5, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 5.151039540037463e-07, "loss": 0.223, "projector_lr": 1.545311862011239e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4296875, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.3125, "sft_loss": 0.8359375, "step": 3097 }, { "dpo_loss": 0.384765625, "epoch": 0.5, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 5.148501813428346e-07, "loss": 0.2039, "projector_lr": 1.544550544028504e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.03125, "sft_loss": 0.76171875, "step": 3098 }, { "dpo_loss": 0.3203125, "epoch": 0.5, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 5.145964048530474e-07, "loss": 0.2297, "projector_lr": 1.5437892145591425e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 3.265625, "rewards_train/rejected": -5.09375, "sft_loss": 0.890625, "step": 3099 }, { "dpo_loss": 0.11181640625, "epoch": 0.5, "final_loss": 0.11181640625, "grad_norm": 0.0, "learning_rate": 5.143426245998167e-07, "loss": 0.14, "projector_lr": 1.5430278737994501e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.6875, "sft_loss": 0.7421875, "step": 3100 }, { "dpo_loss": 0.44140625, "epoch": 0.5, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 5.140888406485755e-07, "loss": 0.3033, "projector_lr": 1.5422665219457265e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.078125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -6.1875, "sft_loss": 0.94140625, "step": 3101 }, { "dpo_loss": 0.059814453125, "epoch": 0.5, "final_loss": 0.059814453125, "grad_norm": 0.0, "learning_rate": 5.13835053064758e-07, "loss": 0.1157, "projector_lr": 1.541505159194274e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.05224609375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.4375, "sft_loss": 0.8125, "step": 3102 }, { "dpo_loss": 0.1533203125, "epoch": 0.5, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 5.135812619137988e-07, "loss": 0.146, "projector_lr": 1.5407437857413965e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.71875, "sft_loss": 0.8828125, "step": 3103 }, { "dpo_loss": 0.1181640625, "epoch": 0.5, "final_loss": 0.1181640625, "grad_norm": 0.0, "learning_rate": 5.133274672611341e-07, "loss": 0.0641, "projector_lr": 1.5399824017834026e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -7.03125, "sft_loss": 0.57421875, "step": 3104 }, { "dpo_loss": 0.2236328125, "epoch": 0.5, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 5.130736691722006e-07, "loss": 0.1708, "projector_lr": 1.539221007516602e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.375, "sft_loss": 0.93359375, "step": 3105 }, { "dpo_loss": 0.25390625, "epoch": 0.5, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 5.128198677124361e-07, "loss": 0.1345, "projector_lr": 1.5384596031373086e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.625, "sft_loss": 0.77734375, "step": 3106 }, { "dpo_loss": 0.028564453125, "epoch": 0.5, "final_loss": 0.028564453125, "grad_norm": 0.0, "learning_rate": 5.125660629472789e-07, "loss": 0.3151, "projector_lr": 1.5376981888418367e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.8125, "sft_loss": 0.55859375, "step": 3107 }, { "dpo_loss": 0.2294921875, "epoch": 0.5, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 5.123122549421684e-07, "loss": 0.5498, "projector_lr": 1.5369367648265055e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.03125, "sft_loss": 1.015625, "step": 3108 }, { "dpo_loss": 0.091796875, "epoch": 0.5, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 5.120584437625452e-07, "loss": 0.2187, "projector_lr": 1.5361753312876356e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -7.3125, "sft_loss": 1.0078125, "step": 3109 }, { "dpo_loss": 0.166015625, "epoch": 0.5, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 5.118046294738499e-07, "loss": 0.1344, "projector_lr": 1.53541388842155e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.1875, "sft_loss": 0.5546875, "step": 3110 }, { "dpo_loss": 0.408203125, "epoch": 0.5, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 5.115508121415248e-07, "loss": 0.2712, "projector_lr": 1.5346524364245744e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 3.828125, "rewards_train/rejected": -5.65625, "sft_loss": 0.76953125, "step": 3111 }, { "dpo_loss": 0.435546875, "epoch": 0.5, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 5.112969918310119e-07, "loss": 0.4469, "projector_lr": 1.5338909754930359e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.5625, "rewards_train/margins": 3.953125, "rewards_train/rejected": -6.53125, "sft_loss": 0.71875, "step": 3112 }, { "dpo_loss": 0.2119140625, "epoch": 0.5, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 5.110431686077553e-07, "loss": 0.1501, "projector_lr": 1.533129505823266e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.46875, "sft_loss": 1.7734375, "step": 3113 }, { "dpo_loss": 0.388671875, "epoch": 0.5, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 5.107893425371986e-07, "loss": 0.2559, "projector_lr": 1.532368027611596e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.734375, "rewards_train/margins": 4.375, "rewards_train/rejected": -7.125, "sft_loss": 0.83203125, "step": 3114 }, { "dpo_loss": 0.306640625, "epoch": 0.5, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 5.105355136847871e-07, "loss": 0.3822, "projector_lr": 1.5316065410543613e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.234375, "rewards_train/margins": 2.953125, "rewards_train/rejected": -5.1875, "sft_loss": 0.80078125, "step": 3115 }, { "dpo_loss": 0.1708984375, "epoch": 0.5, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 5.102816821159663e-07, "loss": 0.195, "projector_lr": 1.5308450463478988e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.90625, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.65625, "sft_loss": 0.76171875, "step": 3116 }, { "dpo_loss": 0.265625, "epoch": 0.5, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 5.100278478961822e-07, "loss": 0.1417, "projector_lr": 1.5300835436885468e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.09375, "sft_loss": 0.65234375, "step": 3117 }, { "dpo_loss": 0.1474609375, "epoch": 0.5, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 5.097740110908821e-07, "loss": 0.1713, "projector_lr": 1.5293220332726465e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.875, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.59375, "sft_loss": 0.57421875, "step": 3118 }, { "dpo_loss": 0.421875, "epoch": 0.5, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 5.095201717655136e-07, "loss": 0.2594, "projector_lr": 1.528560515296541e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.734375, "rewards_train/margins": 1.9609375, "rewards_train/rejected": -4.6875, "sft_loss": 1.0234375, "step": 3119 }, { "dpo_loss": 0.0537109375, "epoch": 0.5, "final_loss": 0.0537109375, "grad_norm": 0.0, "learning_rate": 5.092663299855251e-07, "loss": 0.1252, "projector_lr": 1.5277989899565753e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.0625, "sft_loss": 0.6953125, "step": 3120 }, { "dpo_loss": 0.2119140625, "epoch": 0.5, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 5.090124858163653e-07, "loss": 0.1355, "projector_lr": 1.5270374574490958e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.78125, "rewards_train/margins": 4.375, "rewards_train/rejected": -6.15625, "sft_loss": 0.78125, "step": 3121 }, { "dpo_loss": 0.036376953125, "epoch": 0.5, "final_loss": 0.036376953125, "grad_norm": 0.0, "learning_rate": 5.08758639323484e-07, "loss": 0.4422, "projector_lr": 1.5262759179704519e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.625, "sft_loss": 0.734375, "step": 3122 }, { "dpo_loss": 0.2314453125, "epoch": 0.5, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 5.08504790572331e-07, "loss": 0.1505, "projector_lr": 1.5255143717169935e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.09375, "sft_loss": 0.55859375, "step": 3123 }, { "dpo_loss": 0.130859375, "epoch": 0.5, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 5.082509396283575e-07, "loss": 0.1534, "projector_lr": 1.5247528188850727e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.84375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -8.0625, "sft_loss": 1.4296875, "step": 3124 }, { "dpo_loss": 0.02392578125, "epoch": 0.5, "final_loss": 0.02392578125, "grad_norm": 0.0, "learning_rate": 5.079970865570145e-07, "loss": 0.0526, "projector_lr": 1.5239912596710437e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.578125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.78125, "sft_loss": 0.71484375, "step": 3125 }, { "dpo_loss": 0.029541015625, "epoch": 0.5, "final_loss": 0.029541015625, "grad_norm": 0.0, "learning_rate": 5.07743231423754e-07, "loss": 0.2423, "projector_lr": 1.5232296942712621e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -7.03125, "sft_loss": 0.8203125, "step": 3126 }, { "dpo_loss": 0.1708984375, "epoch": 0.5, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 5.074893742940282e-07, "loss": 0.1235, "projector_lr": 1.5224681228820849e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.375, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.4375, "sft_loss": 0.89453125, "step": 3127 }, { "dpo_loss": 0.041748046875, "epoch": 0.5, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 5.072355152332901e-07, "loss": 0.0808, "projector_lr": 1.5217065456998704e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.5, "rewards_train/margins": 5.0, "rewards_train/rejected": -7.5, "sft_loss": 0.99609375, "step": 3128 }, { "dpo_loss": 0.53125, "epoch": 0.5, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 5.069816543069932e-07, "loss": 0.39, "projector_lr": 1.52094496292098e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.484375, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.3125, "sft_loss": 0.6875, "step": 3129 }, { "dpo_loss": 0.220703125, "epoch": 0.5, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 5.067277915805912e-07, "loss": 0.2078, "projector_lr": 1.5201833747417738e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.765625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.96875, "sft_loss": 0.671875, "step": 3130 }, { "dpo_loss": 0.2197265625, "epoch": 0.5, "final_loss": 0.2197265625, "grad_norm": 0.0, "learning_rate": 5.064739271195385e-07, "loss": 0.2104, "projector_lr": 1.5194217813586157e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.921875, "rewards_train/margins": 2.9375, "rewards_train/rejected": -4.84375, "sft_loss": 0.6015625, "step": 3131 }, { "dpo_loss": 0.240234375, "epoch": 0.5, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 5.0622006098929e-07, "loss": 0.2415, "projector_lr": 1.51866018296787e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.40625, "rewards_train/margins": 3.796875, "rewards_train/rejected": -6.21875, "sft_loss": 1.0078125, "step": 3132 }, { "dpo_loss": 0.1416015625, "epoch": 0.5, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 5.059661932553008e-07, "loss": 0.1083, "projector_lr": 1.5178985797659024e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.953125, "rewards_train/margins": 6.375, "rewards_train/rejected": -8.3125, "sft_loss": 1.171875, "step": 3133 }, { "dpo_loss": 0.08447265625, "epoch": 0.5, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 5.057123239830263e-07, "loss": 0.2968, "projector_lr": 1.517136971949079e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 6.78125, "rewards_train/rejected": -8.3125, "sft_loss": 1.359375, "step": 3134 }, { "dpo_loss": 0.25, "epoch": 0.5, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 5.054584532379229e-07, "loss": 0.2172, "projector_lr": 1.516375359713769e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.0, "sft_loss": 0.69140625, "step": 3135 }, { "dpo_loss": 0.07177734375, "epoch": 0.5, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 5.05204581085447e-07, "loss": 0.5131, "projector_lr": 1.5156137432563413e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.875, "sft_loss": 0.77734375, "step": 3136 }, { "dpo_loss": 0.349609375, "epoch": 0.5, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 5.049507075910552e-07, "loss": 0.2195, "projector_lr": 1.5148521227731656e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.9375, "sft_loss": 0.7265625, "step": 3137 }, { "dpo_loss": 0.1337890625, "epoch": 0.5, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 5.046968328202047e-07, "loss": 0.1038, "projector_lr": 1.5140904984606143e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.75, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.8125, "sft_loss": 0.7890625, "step": 3138 }, { "dpo_loss": 0.07666015625, "epoch": 0.5, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 5.044429568383528e-07, "loss": 0.3289, "projector_lr": 1.5133288705150586e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.40625, "sft_loss": 0.73828125, "step": 3139 }, { "dpo_loss": 0.0341796875, "epoch": 0.5, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 5.041890797109575e-07, "loss": 0.0383, "projector_lr": 1.5125672391328726e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.125, "sft_loss": 0.70703125, "step": 3140 }, { "dpo_loss": 0.80078125, "epoch": 0.5, "final_loss": 0.80078125, "grad_norm": 0.0, "learning_rate": 5.039352015034765e-07, "loss": 0.4522, "projector_lr": 1.5118056045104298e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.9375, "sft_loss": 0.82421875, "step": 3141 }, { "dpo_loss": 0.380859375, "epoch": 0.5, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 5.036813222813686e-07, "loss": 0.663, "projector_lr": 1.5110439668441059e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.375, "sft_loss": 0.81640625, "step": 3142 }, { "dpo_loss": 0.0625, "epoch": 0.5, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 5.034274421100919e-07, "loss": 0.2312, "projector_lr": 1.510282326330276e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.84375, "sft_loss": 0.875, "step": 3143 }, { "dpo_loss": 0.1923828125, "epoch": 0.5, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 5.031735610551057e-07, "loss": 0.2381, "projector_lr": 1.5095206831653172e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.734375, "rewards_train/margins": 6.4375, "rewards_train/rejected": -8.1875, "sft_loss": 0.79296875, "step": 3144 }, { "dpo_loss": 0.5234375, "epoch": 0.5, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 5.029196791818688e-07, "loss": 0.3832, "projector_lr": 1.5087590375456063e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 2.34375, "rewards_train/rejected": -4.15625, "sft_loss": 0.68359375, "step": 3145 }, { "dpo_loss": 0.1689453125, "epoch": 0.5, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 5.026657965558403e-07, "loss": 0.123, "projector_lr": 1.507997389667521e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 2.875, "rewards_train/rejected": -4.625, "sft_loss": 0.91796875, "step": 3146 }, { "dpo_loss": 0.361328125, "epoch": 0.5, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 5.024119132424801e-07, "loss": 0.2246, "projector_lr": 1.5072357397274405e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -5.0625, "sft_loss": 0.7109375, "step": 3147 }, { "dpo_loss": 0.27734375, "epoch": 0.5, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 5.021580293072476e-07, "loss": 0.1411, "projector_lr": 1.506474087921743e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 1.9921875, "rewards_train/rejected": -3.265625, "sft_loss": 0.796875, "step": 3148 }, { "dpo_loss": 0.345703125, "epoch": 0.5, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 5.019041448156027e-07, "loss": 0.2359, "projector_lr": 1.5057124344468082e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.890625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.5625, "sft_loss": 0.77734375, "step": 3149 }, { "dpo_loss": 0.107421875, "epoch": 0.5, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 5.016502598330052e-07, "loss": 0.1595, "projector_lr": 1.5049507794990158e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.84375, "rewards_train/rejected": -4.96875, "sft_loss": 0.75390625, "step": 3150 }, { "dpo_loss": 0.06689453125, "epoch": 0.5, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 5.013963744249153e-07, "loss": 0.0383, "projector_lr": 1.5041891232747461e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.875, "sft_loss": 0.875, "step": 3151 }, { "dpo_loss": 0.2890625, "epoch": 0.5, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 5.011424886567932e-07, "loss": 0.2154, "projector_lr": 1.5034274659703797e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.03125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -7.09375, "sft_loss": 0.8046875, "step": 3152 }, { "dpo_loss": 0.322265625, "epoch": 0.5, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 5.008886025940991e-07, "loss": 0.2648, "projector_lr": 1.5026658077822972e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 2.78125, "rewards_train/rejected": -4.65625, "sft_loss": 1.9375, "step": 3153 }, { "dpo_loss": 0.0673828125, "epoch": 0.5, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 5.006347163022935e-07, "loss": 0.0599, "projector_lr": 1.5019041489068805e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.46875, "sft_loss": 0.73828125, "step": 3154 }, { "dpo_loss": 0.1650390625, "epoch": 0.5, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 5.003808298468365e-07, "loss": 0.19, "projector_lr": 1.5011424895405095e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.8125, "sft_loss": 0.7890625, "step": 3155 }, { "dpo_loss": 0.50390625, "epoch": 0.5, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 5.001269432931889e-07, "loss": 0.2732, "projector_lr": 1.500380829879567e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.875, "sft_loss": 1.0390625, "step": 3156 }, { "dpo_loss": 0.271484375, "epoch": 0.51, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 4.998730567068112e-07, "loss": 0.2132, "projector_lr": 1.4996191701204336e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.171875, "rewards_train/margins": 3.546875, "rewards_train/rejected": -5.71875, "sft_loss": 0.8828125, "step": 3157 }, { "dpo_loss": 0.33984375, "epoch": 0.51, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 4.996191701531635e-07, "loss": 0.1974, "projector_lr": 1.4988575104594906e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 3.609375, "rewards_train/rejected": -5.0625, "sft_loss": 0.73046875, "step": 3158 }, { "dpo_loss": 0.24609375, "epoch": 0.51, "final_loss": 0.24609375, "grad_norm": 0.0, "learning_rate": 4.993652836977066e-07, "loss": 0.3405, "projector_lr": 1.49809585109312e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.4375, "sft_loss": 0.6640625, "step": 3159 }, { "dpo_loss": 0.04833984375, "epoch": 0.51, "final_loss": 0.04833984375, "grad_norm": 0.0, "learning_rate": 4.991113974059009e-07, "loss": 0.1175, "projector_lr": 1.497334192217703e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.9375, "sft_loss": 0.58984375, "step": 3160 }, { "dpo_loss": 0.203125, "epoch": 0.51, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 4.988575113432068e-07, "loss": 0.1585, "projector_lr": 1.4965725340296204e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.671875, "rewards_train/margins": 6.53125, "rewards_train/rejected": -8.1875, "sft_loss": 0.84375, "step": 3161 }, { "dpo_loss": 0.1279296875, "epoch": 0.51, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 4.986036255750847e-07, "loss": 0.1368, "projector_lr": 1.495810876725254e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.34375, "sft_loss": 0.9453125, "step": 3162 }, { "dpo_loss": 0.07080078125, "epoch": 0.51, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 4.983497401669948e-07, "loss": 0.4513, "projector_lr": 1.4950492205009845e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.78125, "sft_loss": 0.49609375, "step": 3163 }, { "dpo_loss": 0.1669921875, "epoch": 0.51, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 4.980958551843974e-07, "loss": 0.2303, "projector_lr": 1.4942875655531923e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.53125, "sft_loss": 0.78125, "step": 3164 }, { "dpo_loss": 0.006561279296875, "epoch": 0.51, "final_loss": 0.006561279296875, "grad_norm": 0.0, "learning_rate": 4.978419706927524e-07, "loss": 0.1117, "projector_lr": 1.4935259120782573e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 7.1875, "rewards_train/rejected": -8.5, "sft_loss": 0.921875, "step": 3165 }, { "dpo_loss": 0.27734375, "epoch": 0.51, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 4.975880867575199e-07, "loss": 0.2287, "projector_lr": 1.4927642602725596e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.9375, "sft_loss": 0.8828125, "step": 3166 }, { "dpo_loss": 0.65625, "epoch": 0.51, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 4.973342034441596e-07, "loss": 0.4144, "projector_lr": 1.4920026103324787e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.0, "rewards_train/margins": 3.46875, "rewards_train/rejected": -5.46875, "sft_loss": 0.8984375, "step": 3167 }, { "dpo_loss": 0.07861328125, "epoch": 0.51, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 4.970803208181314e-07, "loss": 0.1698, "projector_lr": 1.4912409624543944e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -7.0, "sft_loss": 0.7578125, "step": 3168 }, { "dpo_loss": 0.048828125, "epoch": 0.51, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 4.968264389448944e-07, "loss": 0.217, "projector_lr": 1.4904793168346833e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.375, "sft_loss": 0.86328125, "step": 3169 }, { "dpo_loss": 0.32421875, "epoch": 0.51, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 4.965725578899081e-07, "loss": 0.2999, "projector_lr": 1.4897176736697241e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 2.609375, "rewards_train/rejected": -3.53125, "sft_loss": 0.63671875, "step": 3170 }, { "dpo_loss": 0.1396484375, "epoch": 0.51, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 4.963186777186314e-07, "loss": 0.2006, "projector_lr": 1.4889560331558944e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.375, "sft_loss": 0.7734375, "step": 3171 }, { "dpo_loss": 0.2119140625, "epoch": 0.51, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 4.960647984965234e-07, "loss": 0.1835, "projector_lr": 1.4881943954895703e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.65625, "rewards_train/rejected": -5.0, "sft_loss": 0.65625, "step": 3172 }, { "dpo_loss": 0.1328125, "epoch": 0.51, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 4.958109202890427e-07, "loss": 0.0883, "projector_lr": 1.4874327608671281e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.46875, "sft_loss": 0.58984375, "step": 3173 }, { "dpo_loss": 0.373046875, "epoch": 0.51, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 4.955570431616472e-07, "loss": 0.2392, "projector_lr": 1.486671129484942e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.515625, "sft_loss": 0.98046875, "step": 3174 }, { "dpo_loss": 0.177734375, "epoch": 0.51, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 4.953031671797953e-07, "loss": 0.2283, "projector_lr": 1.4859095015393862e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9140625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.5, "sft_loss": 0.7265625, "step": 3175 }, { "dpo_loss": 0.034423828125, "epoch": 0.51, "final_loss": 0.034423828125, "grad_norm": 0.0, "learning_rate": 4.950492924089448e-07, "loss": 0.0201, "projector_lr": 1.4851478772268343e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.625, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.96875, "sft_loss": 0.5859375, "step": 3176 }, { "dpo_loss": 0.0654296875, "epoch": 0.51, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 4.94795418914553e-07, "loss": 0.2184, "projector_lr": 1.4843862567436594e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.65625, "sft_loss": 0.64453125, "step": 3177 }, { "dpo_loss": 0.07373046875, "epoch": 0.51, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 4.945415467620771e-07, "loss": 0.1306, "projector_lr": 1.4836246402862312e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.65625, "sft_loss": 0.74609375, "step": 3178 }, { "dpo_loss": 0.1708984375, "epoch": 0.51, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 4.942876760169736e-07, "loss": 0.229, "projector_lr": 1.4828630280509212e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.75, "sft_loss": 1.03125, "step": 3179 }, { "dpo_loss": 0.12060546875, "epoch": 0.51, "final_loss": 0.12060546875, "grad_norm": 0.0, "learning_rate": 4.940338067446993e-07, "loss": 0.0807, "projector_lr": 1.482101420234098e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.75, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.71875, "sft_loss": 0.71875, "step": 3180 }, { "dpo_loss": 0.05224609375, "epoch": 0.51, "final_loss": 0.05224609375, "grad_norm": 0.0, "learning_rate": 4.9377993901071e-07, "loss": 0.1002, "projector_lr": 1.48133981703213e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.59375, "sft_loss": 0.9453125, "step": 3181 }, { "dpo_loss": 0.05126953125, "epoch": 0.51, "final_loss": 0.05126953125, "grad_norm": 0.0, "learning_rate": 4.935260728804616e-07, "loss": 0.1274, "projector_lr": 1.4805782186413848e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 6.75, "rewards_train/rejected": -8.25, "sft_loss": 0.74609375, "step": 3182 }, { "dpo_loss": 0.03515625, "epoch": 0.51, "final_loss": 0.03515625, "grad_norm": 0.0, "learning_rate": 4.932722084194088e-07, "loss": 0.1065, "projector_lr": 1.4798166252582265e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.40625, "sft_loss": 0.69140625, "step": 3183 }, { "dpo_loss": 0.212890625, "epoch": 0.51, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 4.930183456930068e-07, "loss": 0.1545, "projector_lr": 1.4790550370790206e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.75, "sft_loss": 0.7734375, "step": 3184 }, { "dpo_loss": 0.177734375, "epoch": 0.51, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 4.927644847667098e-07, "loss": 0.2774, "projector_lr": 1.4782934543001295e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.5625, "sft_loss": 0.90234375, "step": 3185 }, { "dpo_loss": 0.79296875, "epoch": 0.51, "final_loss": 0.79296875, "grad_norm": 0.0, "learning_rate": 4.925106257059719e-07, "loss": 0.4126, "projector_lr": 1.4775318771179158e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.609375, "rewards_train/margins": 2.640625, "rewards_train/rejected": -5.25, "sft_loss": 0.71875, "step": 3186 }, { "dpo_loss": 0.020263671875, "epoch": 0.51, "final_loss": 0.020263671875, "grad_norm": 0.0, "learning_rate": 4.922567685762461e-07, "loss": 0.1522, "projector_lr": 1.4767703057287384e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.71875, "sft_loss": 0.73828125, "step": 3187 }, { "dpo_loss": 0.1845703125, "epoch": 0.51, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 4.920029134429855e-07, "loss": 0.1823, "projector_lr": 1.4760087403289565e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.71875, "sft_loss": 0.69921875, "step": 3188 }, { "dpo_loss": 0.1142578125, "epoch": 0.51, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 4.917490603716425e-07, "loss": 0.172, "projector_lr": 1.4752471811149274e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.546875, "sft_loss": 0.875, "step": 3189 }, { "dpo_loss": 0.1025390625, "epoch": 0.51, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 4.914952094276688e-07, "loss": 0.0881, "projector_lr": 1.4744856282830066e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9765625, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.34375, "sft_loss": 0.6484375, "step": 3190 }, { "dpo_loss": 0.255859375, "epoch": 0.51, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 4.912413606765161e-07, "loss": 0.268, "projector_lr": 1.4737240820295482e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.1875, "sft_loss": 0.734375, "step": 3191 }, { "dpo_loss": 0.21484375, "epoch": 0.51, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 4.909875141836348e-07, "loss": 0.1544, "projector_lr": 1.4729625425509045e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.78125, "sft_loss": 0.8828125, "step": 3192 }, { "dpo_loss": 0.2490234375, "epoch": 0.51, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 4.907336700144749e-07, "loss": 0.1267, "projector_lr": 1.472201010043425e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.265625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.34375, "sft_loss": 0.81640625, "step": 3193 }, { "dpo_loss": 0.271484375, "epoch": 0.51, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 4.904798282344863e-07, "loss": 0.3773, "projector_lr": 1.471439484703459e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.09375, "sft_loss": 0.95703125, "step": 3194 }, { "dpo_loss": 0.041259765625, "epoch": 0.51, "final_loss": 0.041259765625, "grad_norm": 0.0, "learning_rate": 4.902259889091179e-07, "loss": 0.0733, "projector_lr": 1.4706779667273538e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.53125, "sft_loss": 0.640625, "step": 3195 }, { "dpo_loss": 0.01470947265625, "epoch": 0.51, "final_loss": 0.01470947265625, "grad_norm": 0.0, "learning_rate": 4.899721521038178e-07, "loss": 0.0156, "projector_lr": 1.4699164563114535e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.3125, "sft_loss": 0.7109375, "step": 3196 }, { "dpo_loss": 0.451171875, "epoch": 0.51, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 4.897183178840338e-07, "loss": 0.2678, "projector_lr": 1.4691549536521015e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.71875, "rewards_train/rejected": -4.15625, "sft_loss": 0.59765625, "step": 3197 }, { "dpo_loss": 0.2431640625, "epoch": 0.51, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 4.894644863152129e-07, "loss": 0.1755, "projector_lr": 1.4683934589456388e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.6875, "sft_loss": 0.96875, "step": 3198 }, { "dpo_loss": 0.15625, "epoch": 0.51, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 4.892106574628014e-07, "loss": 0.0939, "projector_lr": 1.467631972388404e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.90625, "sft_loss": 0.6328125, "step": 3199 }, { "dpo_loss": 0.1630859375, "epoch": 0.51, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 4.889568313922448e-07, "loss": 0.1005, "projector_lr": 1.4668704941767345e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.125, "sft_loss": 0.8046875, "step": 3200 }, { "dpo_loss": 0.333984375, "epoch": 0.51, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 4.88703008168988e-07, "loss": 0.4635, "projector_lr": 1.4661090245069644e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 2.34375, "rewards_train/rejected": -4.0625, "sft_loss": 0.86328125, "step": 3201 }, { "dpo_loss": 0.11474609375, "epoch": 0.51, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 4.884491878584754e-07, "loss": 0.1222, "projector_lr": 1.4653475635754261e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.0625, "sft_loss": 0.81640625, "step": 3202 }, { "dpo_loss": 0.19921875, "epoch": 0.51, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 4.881953705261501e-07, "loss": 0.4591, "projector_lr": 1.46458611157845e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.625, "sft_loss": 0.9296875, "step": 3203 }, { "dpo_loss": 0.07861328125, "epoch": 0.51, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 4.879415562374549e-07, "loss": 0.1708, "projector_lr": 1.4638246687123649e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.5, "sft_loss": 0.65234375, "step": 3204 }, { "dpo_loss": 0.021484375, "epoch": 0.51, "final_loss": 0.021484375, "grad_norm": 0.0, "learning_rate": 4.876877450578315e-07, "loss": 0.288, "projector_lr": 1.4630632351734947e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 6.71875, "rewards_train/rejected": -8.125, "sft_loss": 0.68359375, "step": 3205 }, { "dpo_loss": 0.0213623046875, "epoch": 0.51, "final_loss": 0.0213623046875, "grad_norm": 0.0, "learning_rate": 4.874339370527212e-07, "loss": 0.0309, "projector_lr": 1.4623018111581636e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.84375, "sft_loss": 0.7734375, "step": 3206 }, { "dpo_loss": 0.41796875, "epoch": 0.51, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 4.87180132287564e-07, "loss": 0.507, "projector_lr": 1.461540396862692e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.15625, "sft_loss": 0.859375, "step": 3207 }, { "dpo_loss": 0.58984375, "epoch": 0.51, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 4.869263308277992e-07, "loss": 0.3317, "projector_lr": 1.460778992483398e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.625, "sft_loss": 0.76953125, "step": 3208 }, { "dpo_loss": 0.16796875, "epoch": 0.51, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 4.86672532738866e-07, "loss": 0.1192, "projector_lr": 1.460017598216598e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.9375, "sft_loss": 0.6484375, "step": 3209 }, { "dpo_loss": 0.30859375, "epoch": 0.51, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 4.864187380862013e-07, "loss": 0.4377, "projector_lr": 1.459256214258604e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.6875, "sft_loss": 0.671875, "step": 3210 }, { "dpo_loss": 0.2353515625, "epoch": 0.51, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 4.861649469352421e-07, "loss": 0.1578, "projector_lr": 1.4584948408057266e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.5625, "sft_loss": 0.69140625, "step": 3211 }, { "dpo_loss": 0.3125, "epoch": 0.51, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 4.859111593514245e-07, "loss": 0.1707, "projector_lr": 1.4577334780542734e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.84375, "sft_loss": 0.921875, "step": 3212 }, { "dpo_loss": 0.06103515625, "epoch": 0.51, "final_loss": 0.06103515625, "grad_norm": 0.0, "learning_rate": 4.856573754001834e-07, "loss": 0.0369, "projector_lr": 1.4569721262005504e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.96875, "sft_loss": 0.65625, "step": 3213 }, { "dpo_loss": 0.01275634765625, "epoch": 0.51, "final_loss": 0.01275634765625, "grad_norm": 0.0, "learning_rate": 4.854035951469527e-07, "loss": 0.2832, "projector_lr": 1.456210785440858e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.421875, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.8125, "sft_loss": 0.55078125, "step": 3214 }, { "dpo_loss": 0.03857421875, "epoch": 0.51, "final_loss": 0.03857421875, "grad_norm": 0.0, "learning_rate": 4.851498186571653e-07, "loss": 0.0399, "projector_lr": 1.4554494559714962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.6875, "sft_loss": 0.6875, "step": 3215 }, { "dpo_loss": 0.224609375, "epoch": 0.51, "final_loss": 0.224609375, "grad_norm": 0.0, "learning_rate": 4.848960459962537e-07, "loss": 0.1457, "projector_lr": 1.4546881379887613e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.375, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.375, "sft_loss": 0.57421875, "step": 3216 }, { "dpo_loss": 0.59765625, "epoch": 0.51, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 4.846422772296489e-07, "loss": 0.5285, "projector_lr": 1.4539268316889467e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 2.125, "rewards_train/rejected": -3.671875, "sft_loss": 0.9140625, "step": 3217 }, { "dpo_loss": 0.0712890625, "epoch": 0.51, "final_loss": 0.0712890625, "grad_norm": 0.0, "learning_rate": 4.843885124227812e-07, "loss": 0.0967, "projector_lr": 1.4531655372683436e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.5, "sft_loss": 0.6953125, "step": 3218 }, { "dpo_loss": 0.115234375, "epoch": 0.52, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 4.841347516410791e-07, "loss": 0.2129, "projector_lr": 1.4524042549232376e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.447265625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.46875, "sft_loss": 0.53125, "step": 3219 }, { "dpo_loss": 0.1748046875, "epoch": 0.52, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 4.838809949499712e-07, "loss": 0.1258, "projector_lr": 1.4516429848499136e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.5625, "sft_loss": 0.6796875, "step": 3220 }, { "dpo_loss": 0.046630859375, "epoch": 0.52, "final_loss": 0.046630859375, "grad_norm": 0.0, "learning_rate": 4.836272424148842e-07, "loss": 0.1911, "projector_lr": 1.4508817272446526e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.375, "sft_loss": 1.015625, "step": 3221 }, { "dpo_loss": 0.48046875, "epoch": 0.52, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 4.833734941012444e-07, "loss": 0.2488, "projector_lr": 1.4501204823037334e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.46875, "sft_loss": 0.73046875, "step": 3222 }, { "dpo_loss": 0.216796875, "epoch": 0.52, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 4.831197500744762e-07, "loss": 0.1648, "projector_lr": 1.4493592502234285e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.9375, "sft_loss": 0.7265625, "step": 3223 }, { "dpo_loss": 0.1591796875, "epoch": 0.52, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 4.828660104000035e-07, "loss": 0.1305, "projector_lr": 1.4485980312000105e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 3.96875, "rewards_train/rejected": -5.09375, "sft_loss": 0.9921875, "step": 3224 }, { "dpo_loss": 0.490234375, "epoch": 0.52, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 4.826122751432488e-07, "loss": 0.3782, "projector_lr": 1.4478368254297466e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.8125, "sft_loss": 0.75, "step": 3225 }, { "dpo_loss": 0.314453125, "epoch": 0.52, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 4.82358544369634e-07, "loss": 0.2085, "projector_lr": 1.4470756331089019e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.8125, "sft_loss": 0.74609375, "step": 3226 }, { "dpo_loss": 0.314453125, "epoch": 0.52, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 4.821048181445791e-07, "loss": 0.4769, "projector_lr": 1.4463144544337374e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 2.890625, "rewards_train/rejected": -4.53125, "sft_loss": 1.09375, "step": 3227 }, { "dpo_loss": 0.09228515625, "epoch": 0.52, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 4.818510965335031e-07, "loss": 0.2721, "projector_lr": 1.4455532896005093e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.28125, "sft_loss": 0.578125, "step": 3228 }, { "dpo_loss": 0.11669921875, "epoch": 0.52, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 4.815973796018242e-07, "loss": 0.1433, "projector_lr": 1.4447921388054726e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.40625, "sft_loss": 0.796875, "step": 3229 }, { "dpo_loss": 0.053955078125, "epoch": 0.52, "final_loss": 0.053955078125, "grad_norm": 0.0, "learning_rate": 4.81343667414959e-07, "loss": 0.0489, "projector_lr": 1.444031002244877e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.3125, "sft_loss": 0.5390625, "step": 3230 }, { "dpo_loss": 0.486328125, "epoch": 0.52, "final_loss": 0.486328125, "grad_norm": 0.0, "learning_rate": 4.810899600383232e-07, "loss": 0.2674, "projector_lr": 1.4432698801149697e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.28125, "sft_loss": 0.7421875, "step": 3231 }, { "dpo_loss": 0.32421875, "epoch": 0.52, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 4.808362575373311e-07, "loss": 0.247, "projector_lr": 1.4425087726119932e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 2.203125, "rewards_train/rejected": -4.0625, "sft_loss": 0.8046875, "step": 3232 }, { "dpo_loss": 0.271484375, "epoch": 0.52, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 4.805825599773953e-07, "loss": 0.1944, "projector_lr": 1.4417476799321862e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 2.078125, "rewards_train/rejected": -3.921875, "sft_loss": 0.98046875, "step": 3233 }, { "dpo_loss": 0.07373046875, "epoch": 0.52, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 4.80328867423928e-07, "loss": 0.2146, "projector_lr": 1.4409866022717842e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41796875, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.53125, "sft_loss": 0.83984375, "step": 3234 }, { "dpo_loss": 0.1845703125, "epoch": 0.52, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 4.800751799423395e-07, "loss": 0.1458, "projector_lr": 1.4402255398270186e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.21875, "rewards_train/margins": 3.421875, "rewards_train/rejected": -3.625, "sft_loss": 0.84375, "step": 3235 }, { "dpo_loss": 0.0093994140625, "epoch": 0.52, "final_loss": 0.0093994140625, "grad_norm": 0.0, "learning_rate": 4.798214975980392e-07, "loss": 0.0333, "projector_lr": 1.4394644927941176e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.71875, "sft_loss": 0.69921875, "step": 3236 }, { "dpo_loss": 0.0181884765625, "epoch": 0.52, "final_loss": 0.0181884765625, "grad_norm": 0.0, "learning_rate": 4.795678204564345e-07, "loss": 0.0473, "projector_lr": 1.4387034613693037e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.65625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.71875, "sft_loss": 1.03125, "step": 3237 }, { "dpo_loss": 0.056884765625, "epoch": 0.52, "final_loss": 0.056884765625, "grad_norm": 0.0, "learning_rate": 4.793141485829322e-07, "loss": 0.5081, "projector_lr": 1.4379424457487968e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.6875, "sft_loss": 0.67578125, "step": 3238 }, { "dpo_loss": 0.1376953125, "epoch": 0.52, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 4.790604820429373e-07, "loss": 0.2715, "projector_lr": 1.437181446128812e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.78125, "sft_loss": 0.90234375, "step": 3239 }, { "dpo_loss": 0.10888671875, "epoch": 0.52, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 4.788068209018536e-07, "loss": 0.1269, "projector_lr": 1.4364204627055611e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 3.375, "rewards_train/rejected": -3.953125, "sft_loss": 0.72265625, "step": 3240 }, { "dpo_loss": 0.703125, "epoch": 0.52, "final_loss": 0.703125, "grad_norm": 0.0, "learning_rate": 4.785531652250834e-07, "loss": 0.4047, "projector_lr": 1.4356594956752504e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.40625, "rewards_train/margins": 3.015625, "rewards_train/rejected": -5.40625, "sft_loss": 0.74609375, "step": 3241 }, { "dpo_loss": 0.13671875, "epoch": 0.52, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 4.782995150780278e-07, "loss": 0.1361, "projector_lr": 1.4348985452340834e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.78125, "sft_loss": 1.2734375, "step": 3242 }, { "dpo_loss": 0.197265625, "epoch": 0.52, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 4.780458705260861e-07, "loss": 0.1359, "projector_lr": 1.4341376115782585e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.40625, "sft_loss": 0.89453125, "step": 3243 }, { "dpo_loss": 0.0791015625, "epoch": 0.52, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 4.777922316346566e-07, "loss": 0.1658, "projector_lr": 1.43337669490397e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.59375, "sft_loss": 0.875, "step": 3244 }, { "dpo_loss": 0.02783203125, "epoch": 0.52, "final_loss": 0.02783203125, "grad_norm": 0.0, "learning_rate": 4.775385984691361e-07, "loss": 0.1357, "projector_lr": 1.4326157954074083e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 7.375, "rewards_train/rejected": -8.25, "sft_loss": 0.82421875, "step": 3245 }, { "dpo_loss": 0.091796875, "epoch": 0.52, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 4.772849710949192e-07, "loss": 0.083, "projector_lr": 1.4318549132847577e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.25, "sft_loss": 0.765625, "step": 3246 }, { "dpo_loss": 0.057861328125, "epoch": 0.52, "final_loss": 0.057861328125, "grad_norm": 0.0, "learning_rate": 4.770313495774e-07, "loss": 0.0584, "projector_lr": 1.4310940487321998e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.9375, "sft_loss": 0.77734375, "step": 3247 }, { "dpo_loss": 0.33203125, "epoch": 0.52, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 4.767777339819703e-07, "loss": 0.6106, "projector_lr": 1.430333201945911e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.453125, "sft_loss": 0.75, "step": 3248 }, { "dpo_loss": 0.11279296875, "epoch": 0.52, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 4.765241243740213e-07, "loss": 0.0583, "projector_lr": 1.4295723731220639e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.038330078125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.21875, "sft_loss": 0.6015625, "step": 3249 }, { "dpo_loss": 0.0849609375, "epoch": 0.52, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 4.762705208189414e-07, "loss": 0.2257, "projector_lr": 1.4288115624568243e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.40625, "sft_loss": 0.73046875, "step": 3250 }, { "dpo_loss": 0.1923828125, "epoch": 0.52, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 4.7601692338211854e-07, "loss": 0.1705, "projector_lr": 1.4280507701463557e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.65625, "sft_loss": 0.70703125, "step": 3251 }, { "dpo_loss": 0.00445556640625, "epoch": 0.52, "final_loss": 0.00445556640625, "grad_norm": 0.0, "learning_rate": 4.7576333212893846e-07, "loss": 0.0549, "projector_lr": 1.4272899963868156e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 7.46875, "rewards_train/rejected": -8.1875, "sft_loss": 0.90234375, "step": 3252 }, { "dpo_loss": 0.48828125, "epoch": 0.52, "final_loss": 0.48828125, "grad_norm": 0.0, "learning_rate": 4.7550974712478557e-07, "loss": 0.2539, "projector_lr": 1.426529241374357e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.4375, "sft_loss": 0.88671875, "step": 3253 }, { "dpo_loss": 0.01470947265625, "epoch": 0.52, "final_loss": 0.01470947265625, "grad_norm": 0.0, "learning_rate": 4.752561684350429e-07, "loss": 0.0415, "projector_lr": 1.4257685053051288e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.65625, "sft_loss": 0.78515625, "step": 3254 }, { "dpo_loss": 0.027099609375, "epoch": 0.52, "final_loss": 0.027099609375, "grad_norm": 0.0, "learning_rate": 4.7500259612509104e-07, "loss": 0.2951, "projector_lr": 1.4250077883752733e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01416015625, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.15625, "sft_loss": 0.7265625, "step": 3255 }, { "dpo_loss": 0.1083984375, "epoch": 0.52, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 4.747490302603097e-07, "loss": 0.0889, "projector_lr": 1.4242470907809292e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.71875, "sft_loss": 0.76171875, "step": 3256 }, { "dpo_loss": 0.28515625, "epoch": 0.52, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 4.744954709060766e-07, "loss": 0.3634, "projector_lr": 1.42348641271823e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.15625, "sft_loss": 0.87109375, "step": 3257 }, { "dpo_loss": 0.4609375, "epoch": 0.52, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 4.742419181277682e-07, "loss": 0.2475, "projector_lr": 1.4227257543833046e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 2.328125, "rewards_train/rejected": -4.03125, "sft_loss": 0.9375, "step": 3258 }, { "dpo_loss": 0.11767578125, "epoch": 0.52, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 4.7398837199075844e-07, "loss": 0.0638, "projector_lr": 1.4219651159722753e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 6.8125, "rewards_train/rejected": -7.75, "sft_loss": 0.66796875, "step": 3259 }, { "dpo_loss": 0.05908203125, "epoch": 0.52, "final_loss": 0.05908203125, "grad_norm": 0.0, "learning_rate": 4.7373483256042024e-07, "loss": 0.0429, "projector_lr": 1.4212044976812607e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.046875, "rewards_train/margins": 5.0625, "rewards_train/rejected": -7.09375, "sft_loss": 0.859375, "step": 3260 }, { "dpo_loss": 0.052978515625, "epoch": 0.52, "final_loss": 0.052978515625, "grad_norm": 0.0, "learning_rate": 4.7348129990212453e-07, "loss": 0.1358, "projector_lr": 1.4204438997063737e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.34375, "sft_loss": 0.859375, "step": 3261 }, { "dpo_loss": 0.048095703125, "epoch": 0.52, "final_loss": 0.048095703125, "grad_norm": 0.0, "learning_rate": 4.7322777408124066e-07, "loss": 0.1638, "projector_lr": 1.419683322243722e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.96875, "sft_loss": 0.703125, "step": 3262 }, { "dpo_loss": 0.10302734375, "epoch": 0.52, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 4.729742551631362e-07, "loss": 0.1959, "projector_lr": 1.4189227654894088e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.96875, "sft_loss": 0.58203125, "step": 3263 }, { "dpo_loss": 0.10205078125, "epoch": 0.52, "final_loss": 0.10205078125, "grad_norm": 0.0, "learning_rate": 4.727207432131765e-07, "loss": 0.075, "projector_lr": 1.4181622296395297e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.40625, "sft_loss": 0.69140625, "step": 3264 }, { "dpo_loss": 0.1796875, "epoch": 0.52, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 4.724672382967257e-07, "loss": 0.1227, "projector_lr": 1.4174017148901773e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.9375, "sft_loss": 0.63671875, "step": 3265 }, { "dpo_loss": 0.279296875, "epoch": 0.52, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 4.7221374047914577e-07, "loss": 0.1851, "projector_lr": 1.4166412214374373e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.59375, "rewards_train/margins": 3.828125, "rewards_train/rejected": -4.4375, "sft_loss": 0.5859375, "step": 3266 }, { "dpo_loss": 0.08154296875, "epoch": 0.52, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 4.7196024982579725e-07, "loss": 0.3149, "projector_lr": 1.4158807494773918e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.67578125, "step": 3267 }, { "dpo_loss": 0.287109375, "epoch": 0.52, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 4.717067664020381e-07, "loss": 0.2601, "projector_lr": 1.4151202992061143e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.78125, "sft_loss": 1.1484375, "step": 3268 }, { "dpo_loss": 0.0634765625, "epoch": 0.52, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 4.7145329027322506e-07, "loss": 0.0392, "projector_lr": 1.4143598708196753e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.25, "sft_loss": 0.8515625, "step": 3269 }, { "dpo_loss": 0.064453125, "epoch": 0.52, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 4.711998215047128e-07, "loss": 0.1012, "projector_lr": 1.4135994645141385e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.34375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.8125, "sft_loss": 0.7421875, "step": 3270 }, { "dpo_loss": 0.01507568359375, "epoch": 0.52, "final_loss": 0.01507568359375, "grad_norm": 0.0, "learning_rate": 4.709463601618542e-07, "loss": 0.1189, "projector_lr": 1.4128390804855626e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 6.4375, "rewards_train/rejected": -6.6875, "sft_loss": 0.734375, "step": 3271 }, { "dpo_loss": 0.0225830078125, "epoch": 0.52, "final_loss": 0.0225830078125, "grad_norm": 0.0, "learning_rate": 4.7069290631e-07, "loss": 0.1029, "projector_lr": 1.4120787189300001e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.244140625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.0625, "sft_loss": 0.640625, "step": 3272 }, { "dpo_loss": 0.53515625, "epoch": 0.52, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 4.704394600144992e-07, "loss": 0.3275, "projector_lr": 1.4113183800434976e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 1.859375, "rewards_train/rejected": -3.25, "sft_loss": 1.0, "step": 3273 }, { "dpo_loss": 0.32421875, "epoch": 0.52, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 4.701860213406986e-07, "loss": 0.1726, "projector_lr": 1.4105580640220957e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.6875, "sft_loss": 1.078125, "step": 3274 }, { "dpo_loss": 0.2216796875, "epoch": 0.52, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 4.6993259035394325e-07, "loss": 0.2925, "projector_lr": 1.4097977710618297e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.53125, "sft_loss": 1.015625, "step": 3275 }, { "dpo_loss": 0.203125, "epoch": 0.52, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 4.696791671195763e-07, "loss": 0.123, "projector_lr": 1.409037501358729e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 2.890625, "rewards_train/rejected": -3.625, "sft_loss": 0.9375, "step": 3276 }, { "dpo_loss": 0.08544921875, "epoch": 0.52, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 4.6942575170293865e-07, "loss": 0.0557, "projector_lr": 1.408277255108816e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.84375, "rewards_train/rejected": -4.90625, "sft_loss": 0.8984375, "step": 3277 }, { "dpo_loss": 0.416015625, "epoch": 0.52, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 4.691723441693693e-07, "loss": 0.2142, "projector_lr": 1.407517032508108e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 1.28125, "rewards_train/rejected": -2.515625, "sft_loss": 0.890625, "step": 3278 }, { "dpo_loss": 0.39453125, "epoch": 0.52, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 4.689189445842054e-07, "loss": 0.2141, "projector_lr": 1.4067568337526163e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 1.8828125, "rewards_train/rejected": -2.921875, "sft_loss": 0.81640625, "step": 3279 }, { "dpo_loss": 0.1220703125, "epoch": 0.52, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 4.6866555301278166e-07, "loss": 0.3538, "projector_lr": 1.405996659038345e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.34375, "sft_loss": 0.6015625, "step": 3280 }, { "dpo_loss": 0.7109375, "epoch": 0.52, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 4.684121695204311e-07, "loss": 0.3766, "projector_lr": 1.4052365085612934e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.625, "sft_loss": 0.8828125, "step": 3281 }, { "dpo_loss": 0.05419921875, "epoch": 0.53, "final_loss": 0.05419921875, "grad_norm": 0.0, "learning_rate": 4.681587941724844e-07, "loss": 0.0601, "projector_lr": 1.4044763825174532e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.890625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.25, "sft_loss": 0.82421875, "step": 3282 }, { "dpo_loss": 0.291015625, "epoch": 0.53, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 4.679054270342702e-07, "loss": 0.2441, "projector_lr": 1.4037162811028108e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.234375, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.84375, "sft_loss": 0.8671875, "step": 3283 }, { "dpo_loss": 0.154296875, "epoch": 0.53, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 4.6765206817111513e-07, "loss": 0.1765, "projector_lr": 1.4029562045133455e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.3125, "sft_loss": 1.046875, "step": 3284 }, { "dpo_loss": 0.69140625, "epoch": 0.53, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 4.6739871764834387e-07, "loss": 0.3597, "projector_lr": 1.4021961529450318e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.65625, "sft_loss": 0.8203125, "step": 3285 }, { "dpo_loss": 0.287109375, "epoch": 0.53, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 4.671453755312782e-07, "loss": 0.2608, "projector_lr": 1.4014361265938347e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.03125, "sft_loss": 0.765625, "step": 3286 }, { "dpo_loss": 0.173828125, "epoch": 0.53, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 4.6689204188523843e-07, "loss": 0.1606, "projector_lr": 1.4006761256557154e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.703125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.90625, "sft_loss": 0.79296875, "step": 3287 }, { "dpo_loss": 0.5390625, "epoch": 0.53, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 4.6663871677554263e-07, "loss": 0.445, "projector_lr": 1.399916150326628e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.578125, "sft_loss": 0.72265625, "step": 3288 }, { "dpo_loss": 0.1513671875, "epoch": 0.53, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 4.663854002675063e-07, "loss": 0.0828, "projector_lr": 1.399156200802519e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.05322265625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.125, "sft_loss": 0.921875, "step": 3289 }, { "dpo_loss": 0.0703125, "epoch": 0.53, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 4.6613209242644337e-07, "loss": 0.0817, "projector_lr": 1.3983962772793303e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.609375, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.8125, "sft_loss": 0.66015625, "step": 3290 }, { "dpo_loss": 0.0262451171875, "epoch": 0.53, "final_loss": 0.0262451171875, "grad_norm": 0.0, "learning_rate": 4.6587879331766457e-07, "loss": 0.0475, "projector_lr": 1.3976363799529937e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.65625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.25, "sft_loss": 0.91796875, "step": 3291 }, { "dpo_loss": 0.18359375, "epoch": 0.53, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 4.656255030064791e-07, "loss": 0.1555, "projector_lr": 1.3968765090194372e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.25, "sft_loss": 0.703125, "step": 3292 }, { "dpo_loss": 0.10595703125, "epoch": 0.53, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 4.6537222155819376e-07, "loss": 0.1965, "projector_lr": 1.3961166646745814e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.25, "sft_loss": 0.6640625, "step": 3293 }, { "dpo_loss": 0.10107421875, "epoch": 0.53, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 4.6511894903811333e-07, "loss": 0.0806, "projector_lr": 1.39535684711434e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.5625, "sft_loss": 0.70703125, "step": 3294 }, { "dpo_loss": 0.1787109375, "epoch": 0.53, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 4.6486568551153937e-07, "loss": 0.0909, "projector_lr": 1.3945970565346183e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.8125, "sft_loss": 1.03125, "step": 3295 }, { "dpo_loss": 0.65625, "epoch": 0.53, "final_loss": 0.65625, "grad_norm": 0.0, "learning_rate": 4.64612431043772e-07, "loss": 0.3754, "projector_lr": 1.3938372931313161e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.046875, "rewards_train/margins": 1.25, "rewards_train/rejected": -3.296875, "sft_loss": 0.96484375, "step": 3296 }, { "dpo_loss": 0.0673828125, "epoch": 0.53, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 4.6435918570010885e-07, "loss": 0.1873, "projector_lr": 1.3930775571003267e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.875, "sft_loss": 0.828125, "step": 3297 }, { "dpo_loss": 0.1826171875, "epoch": 0.53, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 4.6410594954584493e-07, "loss": 0.2365, "projector_lr": 1.392317848637535e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.625, "sft_loss": 1.6640625, "step": 3298 }, { "dpo_loss": 0.421875, "epoch": 0.53, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 4.6385272264627326e-07, "loss": 0.3041, "projector_lr": 1.39155816793882e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.5625, "sft_loss": 0.75390625, "step": 3299 }, { "dpo_loss": 0.62890625, "epoch": 0.53, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 4.635995050666839e-07, "loss": 0.4019, "projector_lr": 1.3907985152000517e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 2.171875, "rewards_train/rejected": -4.125, "sft_loss": 0.83984375, "step": 3300 }, { "dpo_loss": 0.039794921875, "epoch": 0.53, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 4.6334629687236494e-07, "loss": 0.061, "projector_lr": 1.390038890617095e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.875, "sft_loss": 0.78515625, "step": 3301 }, { "dpo_loss": 0.033447265625, "epoch": 0.53, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 4.630930981286021e-07, "loss": 0.1692, "projector_lr": 1.3892792943858064e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.21875, "sft_loss": 0.60546875, "step": 3302 }, { "dpo_loss": 0.373046875, "epoch": 0.53, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 4.628399089006786e-07, "loss": 0.3285, "projector_lr": 1.3885197267020358e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.03125, "sft_loss": 0.546875, "step": 3303 }, { "dpo_loss": 0.349609375, "epoch": 0.53, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 4.6258672925387484e-07, "loss": 0.1966, "projector_lr": 1.3877601877616245e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.625, "sft_loss": 0.8359375, "step": 3304 }, { "dpo_loss": 0.2109375, "epoch": 0.53, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 4.623335592534691e-07, "loss": 0.1494, "projector_lr": 1.3870006777604074e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.34375, "sft_loss": 0.765625, "step": 3305 }, { "dpo_loss": 0.017822265625, "epoch": 0.53, "final_loss": 0.017822265625, "grad_norm": 0.0, "learning_rate": 4.620803989647372e-07, "loss": 0.0155, "projector_lr": 1.3862411968942117e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.14453125, "rewards_train/margins": 7.59375, "rewards_train/rejected": -7.4375, "sft_loss": 0.78125, "step": 3306 }, { "dpo_loss": 0.12255859375, "epoch": 0.53, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 4.618272484529523e-07, "loss": 0.1249, "projector_lr": 1.3854817453588572e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.15625, "sft_loss": 0.6875, "step": 3307 }, { "dpo_loss": 0.23828125, "epoch": 0.53, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 4.615741077833855e-07, "loss": 0.303, "projector_lr": 1.3847223233501567e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.96875, "sft_loss": 0.86328125, "step": 3308 }, { "dpo_loss": 0.408203125, "epoch": 0.53, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 4.6132097702130436e-07, "loss": 0.246, "projector_lr": 1.3839629310639132e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.859375, "rewards_train/margins": 2.703125, "rewards_train/rejected": -4.5625, "sft_loss": 0.84765625, "step": 3309 }, { "dpo_loss": 0.2255859375, "epoch": 0.53, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 4.6106785623197473e-07, "loss": 0.1721, "projector_lr": 1.3832035686959244e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -6.40625, "sft_loss": 0.74609375, "step": 3310 }, { "dpo_loss": 0.453125, "epoch": 0.53, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 4.608147454806597e-07, "loss": 0.3074, "projector_lr": 1.3824442364419793e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.078125, "rewards_train/margins": 2.28125, "rewards_train/rejected": -4.375, "sft_loss": 0.67578125, "step": 3311 }, { "dpo_loss": 0.341796875, "epoch": 0.53, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 4.605616448326199e-07, "loss": 0.1792, "projector_lr": 1.3816849344978597e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.28125, "sft_loss": 0.875, "step": 3312 }, { "dpo_loss": 0.1767578125, "epoch": 0.53, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 4.603085543531128e-07, "loss": 0.2059, "projector_lr": 1.3809256630593387e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.09375, "sft_loss": 0.9921875, "step": 3313 }, { "dpo_loss": 0.23046875, "epoch": 0.53, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 4.60055474107394e-07, "loss": 0.2116, "projector_lr": 1.380166422322182e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.9375, "sft_loss": 1.03125, "step": 3314 }, { "dpo_loss": 0.09716796875, "epoch": 0.53, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 4.5980240416071573e-07, "loss": 0.1215, "projector_lr": 1.3794072124821474e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.75, "sft_loss": 0.7734375, "step": 3315 }, { "dpo_loss": 0.208984375, "epoch": 0.53, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 4.5954934457832817e-07, "loss": 0.1517, "projector_lr": 1.3786480337349846e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 2.875, "rewards_train/rejected": -4.40625, "sft_loss": 0.77734375, "step": 3316 }, { "dpo_loss": 0.404296875, "epoch": 0.53, "final_loss": 0.404296875, "grad_norm": 0.0, "learning_rate": 4.592962954254785e-07, "loss": 0.3936, "projector_lr": 1.3778888862764357e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.03125, "sft_loss": 0.61328125, "step": 3317 }, { "dpo_loss": 0.2333984375, "epoch": 0.53, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 4.5904325676741134e-07, "loss": 0.1654, "projector_lr": 1.3771297703022342e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.84375, "sft_loss": 0.9296875, "step": 3318 }, { "dpo_loss": 0.1640625, "epoch": 0.53, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 4.5879022866936845e-07, "loss": 0.1871, "projector_lr": 1.3763706860081055e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.3125, "sft_loss": 0.90625, "step": 3319 }, { "dpo_loss": 0.1064453125, "epoch": 0.53, "final_loss": 0.1064453125, "grad_norm": 0.0, "learning_rate": 4.585372111965892e-07, "loss": 0.0695, "projector_lr": 1.3756116335897678e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.4375, "sft_loss": 0.8046875, "step": 3320 }, { "dpo_loss": 0.431640625, "epoch": 0.53, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 4.582842044143098e-07, "loss": 0.3241, "projector_lr": 1.3748526132429296e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.75, "sft_loss": 0.71875, "step": 3321 }, { "dpo_loss": 0.123046875, "epoch": 0.53, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 4.5803120838776387e-07, "loss": 0.1597, "projector_lr": 1.3740936251632918e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.28125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -6.0625, "sft_loss": 0.65234375, "step": 3322 }, { "dpo_loss": 0.439453125, "epoch": 0.53, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 4.577782231821824e-07, "loss": 0.2909, "projector_lr": 1.3733346695465473e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.46875, "sft_loss": 0.87109375, "step": 3323 }, { "dpo_loss": 0.6875, "epoch": 0.53, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 4.575252488627934e-07, "loss": 0.4219, "projector_lr": 1.3725757465883801e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.203125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.71875, "sft_loss": 0.97265625, "step": 3324 }, { "dpo_loss": 0.0732421875, "epoch": 0.53, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 4.572722854948221e-07, "loss": 0.1868, "projector_lr": 1.3718168564844665e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9765625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.375, "sft_loss": 1.125, "step": 3325 }, { "dpo_loss": 0.296875, "epoch": 0.53, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 4.570193331434913e-07, "loss": 0.1768, "projector_lr": 1.371057999430474e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.875, "sft_loss": 0.67578125, "step": 3326 }, { "dpo_loss": 0.040283203125, "epoch": 0.53, "final_loss": 0.040283203125, "grad_norm": 0.0, "learning_rate": 4.567663918740202e-07, "loss": 0.034, "projector_lr": 1.3702991756220607e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 7.28125, "rewards_train/rejected": -9.0, "sft_loss": 0.75390625, "step": 3327 }, { "dpo_loss": 0.10107421875, "epoch": 0.53, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 4.5651346175162566e-07, "loss": 0.0742, "projector_lr": 1.369540385254877e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.4375, "sft_loss": 0.77734375, "step": 3328 }, { "dpo_loss": 0.07568359375, "epoch": 0.53, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 4.562605428415216e-07, "loss": 0.0779, "projector_lr": 1.368781628524565e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.03125, "sft_loss": 0.7109375, "step": 3329 }, { "dpo_loss": 0.099609375, "epoch": 0.53, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 4.560076352089192e-07, "loss": 0.15, "projector_lr": 1.3680229056267578e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.875, "sft_loss": 0.63671875, "step": 3330 }, { "dpo_loss": 0.1767578125, "epoch": 0.53, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 4.557547389190262e-07, "loss": 0.4075, "projector_lr": 1.3672642167570787e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.171875, "rewards_train/margins": 6.03125, "rewards_train/rejected": -8.1875, "sft_loss": 0.78515625, "step": 3331 }, { "dpo_loss": 0.00439453125, "epoch": 0.53, "final_loss": 0.00439453125, "grad_norm": 0.0, "learning_rate": 4.555018540370479e-07, "loss": 0.0872, "projector_lr": 1.3665055621111439e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.125, "sft_loss": 0.6875, "step": 3332 }, { "dpo_loss": 0.1630859375, "epoch": 0.53, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 4.5524898062818664e-07, "loss": 0.1127, "projector_lr": 1.36574694188456e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.59375, "sft_loss": 1.203125, "step": 3333 }, { "dpo_loss": 0.046142578125, "epoch": 0.53, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 4.549961187576414e-07, "loss": 0.0521, "projector_lr": 1.3649883562729244e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.40625, "sft_loss": 0.6875, "step": 3334 }, { "dpo_loss": 0.07861328125, "epoch": 0.53, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 4.54743268490609e-07, "loss": 0.2012, "projector_lr": 1.3642298054718272e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.53125, "sft_loss": 0.76953125, "step": 3335 }, { "dpo_loss": 0.0751953125, "epoch": 0.53, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 4.5449042989228215e-07, "loss": 0.2683, "projector_lr": 1.3634712896768465e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.78125, "sft_loss": 0.80078125, "step": 3336 }, { "dpo_loss": 0.369140625, "epoch": 0.53, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 4.5423760302785135e-07, "loss": 0.3329, "projector_lr": 1.3627128090835542e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.15625, "sft_loss": 0.396484375, "step": 3337 }, { "dpo_loss": 0.34375, "epoch": 0.53, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 4.539847879625039e-07, "loss": 0.4011, "projector_lr": 1.3619543638875118e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 2.890625, "rewards_train/rejected": -4.71875, "sft_loss": 1.2890625, "step": 3338 }, { "dpo_loss": 0.1142578125, "epoch": 0.53, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 4.5373198476142415e-07, "loss": 0.0952, "projector_lr": 1.3611959542842726e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.03125, "sft_loss": 0.9921875, "step": 3339 }, { "dpo_loss": 0.134765625, "epoch": 0.53, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 4.5347919348979293e-07, "loss": 0.1067, "projector_lr": 1.3604375804693787e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.1875, "sft_loss": 0.83984375, "step": 3340 }, { "dpo_loss": 0.140625, "epoch": 0.53, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 4.532264142127885e-07, "loss": 0.1281, "projector_lr": 1.3596792426383657e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.90625, "sft_loss": 0.69140625, "step": 3341 }, { "dpo_loss": 0.126953125, "epoch": 0.53, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 4.529736469955858e-07, "loss": 0.1929, "projector_lr": 1.3589209409867576e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.359375, "rewards_train/margins": 4.125, "rewards_train/rejected": -6.46875, "sft_loss": 1.0546875, "step": 3342 }, { "dpo_loss": 0.318359375, "epoch": 0.53, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 4.5272089190335677e-07, "loss": 0.2469, "projector_lr": 1.3581626757100705e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.59375, "sft_loss": 0.76953125, "step": 3343 }, { "dpo_loss": 0.1279296875, "epoch": 0.54, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 4.524681490012704e-07, "loss": 0.2158, "projector_lr": 1.3574044470038113e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.75, "sft_loss": 0.76171875, "step": 3344 }, { "dpo_loss": 0.11962890625, "epoch": 0.54, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 4.5221541835449177e-07, "loss": 0.0751, "projector_lr": 1.3566462550634754e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.28125, "sft_loss": 0.72265625, "step": 3345 }, { "dpo_loss": 0.1943359375, "epoch": 0.54, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 4.519627000281837e-07, "loss": 0.0986, "projector_lr": 1.355888100084551e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -6.15625, "sft_loss": 0.86328125, "step": 3346 }, { "dpo_loss": 0.1669921875, "epoch": 0.54, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 4.5170999408750527e-07, "loss": 0.0928, "projector_lr": 1.355129982262516e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.203125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -4.8125, "sft_loss": 0.70703125, "step": 3347 }, { "dpo_loss": 0.004913330078125, "epoch": 0.54, "final_loss": 0.004913330078125, "grad_norm": 0.0, "learning_rate": 4.514573005976128e-07, "loss": 0.026, "projector_lr": 1.3543719017928387e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 6.78125, "rewards_train/rejected": -7.375, "sft_loss": 0.734375, "step": 3348 }, { "dpo_loss": 0.6640625, "epoch": 0.54, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 4.5120461962365907e-07, "loss": 0.6226, "projector_lr": 1.3536138588709774e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 1.4375, "rewards_train/rejected": -3.09375, "sft_loss": 1.078125, "step": 3349 }, { "dpo_loss": 0.57421875, "epoch": 0.54, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 4.5095195123079354e-07, "loss": 0.4926, "projector_lr": 1.3528558536923807e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -5.90625, "sft_loss": 0.92578125, "step": 3350 }, { "dpo_loss": 0.671875, "epoch": 0.54, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 4.506992954841628e-07, "loss": 0.3799, "projector_lr": 1.3520978864524885e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.640625, "rewards_train/margins": 3.265625, "rewards_train/rejected": -4.90625, "sft_loss": 0.89453125, "step": 3351 }, { "dpo_loss": 0.314453125, "epoch": 0.54, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 4.504466524489099e-07, "loss": 0.1766, "projector_lr": 1.3513399573467298e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.75, "sft_loss": 0.69140625, "step": 3352 }, { "dpo_loss": 0.0064697265625, "epoch": 0.54, "final_loss": 0.0064697265625, "grad_norm": 0.0, "learning_rate": 4.5019402219017485e-07, "loss": 0.0543, "projector_lr": 1.3505820665705245e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 6.875, "rewards_train/rejected": -8.25, "sft_loss": 0.95703125, "step": 3353 }, { "dpo_loss": 0.5546875, "epoch": 0.54, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 4.49941404773094e-07, "loss": 0.3342, "projector_lr": 1.349824214319282e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.4375, "sft_loss": 0.703125, "step": 3354 }, { "dpo_loss": 0.359375, "epoch": 0.54, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 4.496888002628008e-07, "loss": 0.2935, "projector_lr": 1.3490664007884026e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6875, "rewards_train/margins": 3.65625, "rewards_train/rejected": -5.34375, "sft_loss": 0.84375, "step": 3355 }, { "dpo_loss": 0.0322265625, "epoch": 0.54, "final_loss": 0.0322265625, "grad_norm": 0.0, "learning_rate": 4.494362087244251e-07, "loss": 0.2135, "projector_lr": 1.3483086261732752e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.40625, "sft_loss": 0.984375, "step": 3356 }, { "dpo_loss": 0.2578125, "epoch": 0.54, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 4.491836302230935e-07, "loss": 0.2895, "projector_lr": 1.3475508906692805e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.59375, "sft_loss": 0.68359375, "step": 3357 }, { "dpo_loss": 0.0341796875, "epoch": 0.54, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 4.4893106482392904e-07, "loss": 0.036, "projector_lr": 1.3467931944717872e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.09375, "sft_loss": 0.875, "step": 3358 }, { "dpo_loss": 0.2421875, "epoch": 0.54, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 4.486785125920518e-07, "loss": 0.2509, "projector_lr": 1.3460355377761555e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.453125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -8.0, "sft_loss": 1.1796875, "step": 3359 }, { "dpo_loss": 0.012451171875, "epoch": 0.54, "final_loss": 0.012451171875, "grad_norm": 0.0, "learning_rate": 4.484259735925781e-07, "loss": 0.036, "projector_lr": 1.3452779207777344e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 6.875, "rewards_train/rejected": -8.3125, "sft_loss": 0.5234375, "step": 3360 }, { "dpo_loss": 0.1923828125, "epoch": 0.54, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 4.4817344789062124e-07, "loss": 0.1083, "projector_lr": 1.3445203436718638e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.1875, "sft_loss": 0.7109375, "step": 3361 }, { "dpo_loss": 0.06298828125, "epoch": 0.54, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 4.4792093555129055e-07, "loss": 0.129, "projector_lr": 1.3437628066538719e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.9375, "sft_loss": 0.69140625, "step": 3362 }, { "dpo_loss": 0.392578125, "epoch": 0.54, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 4.476684366396922e-07, "loss": 0.208, "projector_lr": 1.3430053099190767e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.4375, "rewards_train/margins": 1.8671875, "rewards_train/rejected": -4.3125, "sft_loss": 0.828125, "step": 3363 }, { "dpo_loss": 0.2490234375, "epoch": 0.54, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 4.474159512209289e-07, "loss": 0.1342, "projector_lr": 1.3422478536627869e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 6.1875, "rewards_train/rejected": -8.0, "sft_loss": 1.0, "step": 3364 }, { "dpo_loss": 0.0341796875, "epoch": 0.54, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 4.471634793601e-07, "loss": 0.168, "projector_lr": 1.3414904380803002e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.75, "sft_loss": 0.984375, "step": 3365 }, { "dpo_loss": 0.1982421875, "epoch": 0.54, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 4.469110211223014e-07, "loss": 0.1263, "projector_lr": 1.3407330633669043e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.140625, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.53125, "sft_loss": 1.1484375, "step": 3366 }, { "dpo_loss": 0.158203125, "epoch": 0.54, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 4.466585765726248e-07, "loss": 0.1115, "projector_lr": 1.3399757297178744e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.90625, "sft_loss": 0.53125, "step": 3367 }, { "dpo_loss": 0.1943359375, "epoch": 0.54, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 4.464061457761592e-07, "loss": 0.1889, "projector_lr": 1.3392184373284778e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.84375, "sft_loss": 0.734375, "step": 3368 }, { "dpo_loss": 0.06298828125, "epoch": 0.54, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 4.4615372879798967e-07, "loss": 0.0396, "projector_lr": 1.3384611863939692e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.3125, "sft_loss": 0.7265625, "step": 3369 }, { "dpo_loss": 0.1142578125, "epoch": 0.54, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 4.4590132570319784e-07, "loss": 0.0836, "projector_lr": 1.3377039771095936e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.09375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -6.40625, "sft_loss": 1.125, "step": 3370 }, { "dpo_loss": 0.0020294189453125, "epoch": 0.54, "final_loss": 0.0020294189453125, "grad_norm": 0.0, "learning_rate": 4.456489365568619e-07, "loss": 0.2274, "projector_lr": 1.3369468096705857e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1328125, "rewards_train/margins": 7.625, "rewards_train/rejected": -7.75, "sft_loss": 0.65234375, "step": 3371 }, { "dpo_loss": 0.158203125, "epoch": 0.54, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 4.453965614240558e-07, "loss": 0.1089, "projector_lr": 1.3361896842721674e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515625, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.96875, "sft_loss": 0.6640625, "step": 3372 }, { "dpo_loss": 0.06640625, "epoch": 0.54, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 4.4514420036985055e-07, "loss": 0.1307, "projector_lr": 1.3354326011095518e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.9375, "sft_loss": 0.78515625, "step": 3373 }, { "dpo_loss": 0.26953125, "epoch": 0.54, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 4.448918534593134e-07, "loss": 0.1853, "projector_lr": 1.3346755603779404e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.625, "rewards_train/rejected": -4.84375, "sft_loss": 0.75, "step": 3374 }, { "dpo_loss": 0.267578125, "epoch": 0.54, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 4.4463952075750797e-07, "loss": 0.1715, "projector_lr": 1.333918562272524e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.40625, "sft_loss": 0.71484375, "step": 3375 }, { "dpo_loss": 0.146484375, "epoch": 0.54, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 4.443872023294937e-07, "loss": 0.1954, "projector_lr": 1.3331616069884812e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8125, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.4375, "sft_loss": 0.828125, "step": 3376 }, { "dpo_loss": 0.1123046875, "epoch": 0.54, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 4.4413489824032694e-07, "loss": 0.0641, "projector_lr": 1.332404694720981e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.46875, "sft_loss": 0.74609375, "step": 3377 }, { "dpo_loss": 0.171875, "epoch": 0.54, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 4.438826085550603e-07, "loss": 0.2883, "projector_lr": 1.331647825665181e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.09375, "sft_loss": 0.96875, "step": 3378 }, { "dpo_loss": 0.031494140625, "epoch": 0.54, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 4.4363033333874235e-07, "loss": 0.1318, "projector_lr": 1.330891000016227e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 5.5, "rewards_train/rejected": -7.125, "sft_loss": 0.84375, "step": 3379 }, { "dpo_loss": 0.369140625, "epoch": 0.54, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 4.4337807265641844e-07, "loss": 0.248, "projector_lr": 1.3301342179692553e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.1875, "sft_loss": 0.828125, "step": 3380 }, { "dpo_loss": 0.111328125, "epoch": 0.54, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 4.431258265731294e-07, "loss": 0.1185, "projector_lr": 1.3293774797193883e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.96875, "sft_loss": 0.90625, "step": 3381 }, { "dpo_loss": 0.01806640625, "epoch": 0.54, "final_loss": 0.01806640625, "grad_norm": 0.0, "learning_rate": 4.4287359515391294e-07, "loss": 0.2486, "projector_lr": 1.3286207854617388e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3984375, "rewards_train/margins": 6.9375, "rewards_train/rejected": -7.34375, "sft_loss": 0.69921875, "step": 3382 }, { "dpo_loss": 0.1845703125, "epoch": 0.54, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 4.4262137846380284e-07, "loss": 0.1245, "projector_lr": 1.3278641353914085e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.53125, "rewards_train/margins": 3.5, "rewards_train/rejected": -6.03125, "sft_loss": 1.171875, "step": 3383 }, { "dpo_loss": 0.01141357421875, "epoch": 0.54, "final_loss": 0.01141357421875, "grad_norm": 0.0, "learning_rate": 4.4236917656782917e-07, "loss": 0.0683, "projector_lr": 1.3271075297034876e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.9375, "sft_loss": 0.58984375, "step": 3384 }, { "dpo_loss": 0.01385498046875, "epoch": 0.54, "final_loss": 0.01385498046875, "grad_norm": 0.0, "learning_rate": 4.4211698953101774e-07, "loss": 0.0709, "projector_lr": 1.3263509685930532e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.078125, "rewards_train/margins": 6.78125, "rewards_train/rejected": -8.875, "sft_loss": 0.91796875, "step": 3385 }, { "dpo_loss": 0.0869140625, "epoch": 0.54, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 4.4186481741839094e-07, "loss": 0.1239, "projector_lr": 1.325594452255173e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 6.65625, "rewards_train/rejected": -8.4375, "sft_loss": 0.9375, "step": 3386 }, { "dpo_loss": 0.0130615234375, "epoch": 0.54, "final_loss": 0.0130615234375, "grad_norm": 0.0, "learning_rate": 4.4161266029496727e-07, "loss": 0.0588, "projector_lr": 1.3248379808849018e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4375, "rewards_train/margins": 7.15625, "rewards_train/rejected": -7.59375, "sft_loss": 0.7265625, "step": 3387 }, { "dpo_loss": 0.416015625, "epoch": 0.54, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 4.4136051822576134e-07, "loss": 0.2982, "projector_lr": 1.3240815546772841e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 7.0, "rewards_train/rejected": -7.8125, "sft_loss": 0.59375, "step": 3388 }, { "dpo_loss": 0.1533203125, "epoch": 0.54, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 4.411083912757839e-07, "loss": 0.1693, "projector_lr": 1.3233251738273518e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.6875, "sft_loss": 0.76171875, "step": 3389 }, { "dpo_loss": 0.052978515625, "epoch": 0.54, "final_loss": 0.052978515625, "grad_norm": 0.0, "learning_rate": 4.4085627951004157e-07, "loss": 0.1137, "projector_lr": 1.3225688385301248e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.8125, "sft_loss": 0.984375, "step": 3390 }, { "dpo_loss": 0.1376953125, "epoch": 0.54, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 4.4060418299353724e-07, "loss": 0.1786, "projector_lr": 1.3218125489806117e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.875, "sft_loss": 0.703125, "step": 3391 }, { "dpo_loss": 0.15625, "epoch": 0.54, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 4.403521017912699e-07, "loss": 0.1869, "projector_lr": 1.3210563053738098e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.71875, "sft_loss": 0.8203125, "step": 3392 }, { "dpo_loss": 0.0966796875, "epoch": 0.54, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 4.401000359682348e-07, "loss": 0.0696, "projector_lr": 1.3203001079047046e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.78125, "sft_loss": 1.0078125, "step": 3393 }, { "dpo_loss": 0.123046875, "epoch": 0.54, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 4.398479855894226e-07, "loss": 0.1909, "projector_lr": 1.3195439567682678e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.96875, "sft_loss": 0.86328125, "step": 3394 }, { "dpo_loss": 0.0986328125, "epoch": 0.54, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 4.3959595071982063e-07, "loss": 0.0906, "projector_lr": 1.318787852159462e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.9375, "sft_loss": 0.76953125, "step": 3395 }, { "dpo_loss": 0.5546875, "epoch": 0.54, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 4.393439314244117e-07, "loss": 0.3565, "projector_lr": 1.3180317942732353e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.625, "rewards_train/rejected": -4.53125, "sft_loss": 0.703125, "step": 3396 }, { "dpo_loss": 0.091796875, "epoch": 0.54, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 4.390919277681751e-07, "loss": 0.0628, "projector_lr": 1.3172757833045253e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.28125, "sft_loss": 0.80078125, "step": 3397 }, { "dpo_loss": 0.06591796875, "epoch": 0.54, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 4.3883993981608567e-07, "loss": 0.1552, "projector_lr": 1.3165198194482573e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.78125, "sft_loss": 0.90234375, "step": 3398 }, { "dpo_loss": 0.359375, "epoch": 0.54, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 4.385879676331144e-07, "loss": 0.2128, "projector_lr": 1.3157639028993433e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.578125, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.3125, "sft_loss": 0.73046875, "step": 3399 }, { "dpo_loss": 0.034423828125, "epoch": 0.54, "final_loss": 0.034423828125, "grad_norm": 0.0, "learning_rate": 4.383360112842282e-07, "loss": 0.1235, "projector_lr": 1.3150080338526847e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.0, "sft_loss": 0.7578125, "step": 3400 }, { "dpo_loss": 0.07666015625, "epoch": 0.54, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 4.3808407083439004e-07, "loss": 0.1357, "projector_lr": 1.3142522125031701e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.8125, "sft_loss": 0.9765625, "step": 3401 }, { "dpo_loss": 0.453125, "epoch": 0.54, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 4.378321463485584e-07, "loss": 0.3102, "projector_lr": 1.3134964390456753e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.96875, "sft_loss": 0.84375, "step": 3402 }, { "dpo_loss": 0.486328125, "epoch": 0.54, "final_loss": 0.486328125, "grad_norm": 0.0, "learning_rate": 4.37580237891688e-07, "loss": 0.2678, "projector_lr": 1.312740713675064e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.59375, "sft_loss": 0.93359375, "step": 3403 }, { "dpo_loss": 0.27734375, "epoch": 0.54, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 4.3732834552872913e-07, "loss": 0.6367, "projector_lr": 1.3119850365861875e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.375, "sft_loss": 0.8671875, "step": 3404 }, { "dpo_loss": 0.0048828125, "epoch": 0.54, "final_loss": 0.0048828125, "grad_norm": 0.0, "learning_rate": 4.370764693246283e-07, "loss": 0.1101, "projector_lr": 1.311229407973885e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 7.0625, "rewards_train/rejected": -8.375, "sft_loss": 0.6640625, "step": 3405 }, { "dpo_loss": 0.011474609375, "epoch": 0.54, "final_loss": 0.011474609375, "grad_norm": 0.0, "learning_rate": 4.3682460934432753e-07, "loss": 0.0592, "projector_lr": 1.3104738280329827e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0927734375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.25, "sft_loss": 0.625, "step": 3406 }, { "dpo_loss": 0.498046875, "epoch": 0.55, "final_loss": 0.498046875, "grad_norm": 0.0, "learning_rate": 4.3657276565276506e-07, "loss": 0.3214, "projector_lr": 1.3097182969582954e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.9375, "sft_loss": 0.76953125, "step": 3407 }, { "dpo_loss": 0.326171875, "epoch": 0.55, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 4.363209383148743e-07, "loss": 0.1682, "projector_lr": 1.3089628149446229e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.75, "sft_loss": 0.8046875, "step": 3408 }, { "dpo_loss": 0.67578125, "epoch": 0.55, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 4.360691273955849e-07, "loss": 0.4076, "projector_lr": 1.3082073821867548e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.921875, "rewards_train/margins": 2.234375, "rewards_train/rejected": -5.15625, "sft_loss": 1.1171875, "step": 3409 }, { "dpo_loss": 0.1279296875, "epoch": 0.55, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 4.3581733295982216e-07, "loss": 0.1364, "projector_lr": 1.3074519988794666e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.53125, "sft_loss": 0.5859375, "step": 3410 }, { "dpo_loss": 0.1748046875, "epoch": 0.55, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 4.3556555507250745e-07, "loss": 0.121, "projector_lr": 1.3066966652175225e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.71875, "sft_loss": 0.8359375, "step": 3411 }, { "dpo_loss": 0.0478515625, "epoch": 0.55, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 4.3531379379855714e-07, "loss": 0.1592, "projector_lr": 1.3059413813956715e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.09375, "sft_loss": 0.67578125, "step": 3412 }, { "dpo_loss": 0.71484375, "epoch": 0.55, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 4.3506204920288387e-07, "loss": 0.386, "projector_lr": 1.3051861476086517e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.28125, "rewards_train/margins": 2.765625, "rewards_train/rejected": -5.03125, "sft_loss": 0.7734375, "step": 3413 }, { "dpo_loss": 0.10302734375, "epoch": 0.55, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 4.3481032135039594e-07, "loss": 0.2604, "projector_lr": 1.304430964051188e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.5, "sft_loss": 0.99609375, "step": 3414 }, { "dpo_loss": 0.236328125, "epoch": 0.55, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 4.3455861030599715e-07, "loss": 0.171, "projector_lr": 1.3036758309179915e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.65625, "sft_loss": 0.84375, "step": 3415 }, { "dpo_loss": 0.15234375, "epoch": 0.55, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 4.343069161345874e-07, "loss": 0.1946, "projector_lr": 1.3029207484037623e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.1875, "sft_loss": 0.76953125, "step": 3416 }, { "dpo_loss": 0.142578125, "epoch": 0.55, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 4.340552389010614e-07, "loss": 0.1318, "projector_lr": 1.3021657167031845e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.5, "sft_loss": 0.52734375, "step": 3417 }, { "dpo_loss": 0.0478515625, "epoch": 0.55, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 4.3380357867031026e-07, "loss": 0.1319, "projector_lr": 1.3014107360109309e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 6.90625, "rewards_train/rejected": -7.5625, "sft_loss": 0.64453125, "step": 3418 }, { "dpo_loss": 0.08154296875, "epoch": 0.55, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 4.3355193550722037e-07, "loss": 0.0518, "projector_lr": 1.3006558065216612e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.71875, "sft_loss": 0.81640625, "step": 3419 }, { "dpo_loss": 0.12255859375, "epoch": 0.55, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 4.3330030947667404e-07, "loss": 0.1526, "projector_lr": 1.2999009284300223e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.8125, "sft_loss": 0.53125, "step": 3420 }, { "dpo_loss": 0.140625, "epoch": 0.55, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 4.330487006435485e-07, "loss": 0.203, "projector_lr": 1.2991461019306455e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.9375, "sft_loss": 0.8984375, "step": 3421 }, { "dpo_loss": 0.43359375, "epoch": 0.55, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 4.3279710907271715e-07, "loss": 0.2369, "projector_lr": 1.2983913272181516e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.90625, "sft_loss": 0.62109375, "step": 3422 }, { "dpo_loss": 0.1669921875, "epoch": 0.55, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 4.325455348290488e-07, "loss": 0.1275, "projector_lr": 1.2976366044871463e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.21875, "sft_loss": 1.046875, "step": 3423 }, { "dpo_loss": 0.04638671875, "epoch": 0.55, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 4.322939779774076e-07, "loss": 0.2376, "projector_lr": 1.2968819339322229e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.1875, "sft_loss": 0.8828125, "step": 3424 }, { "dpo_loss": 0.302734375, "epoch": 0.55, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 4.320424385826538e-07, "loss": 0.2843, "projector_lr": 1.2961273157479615e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.21875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -5.28125, "sft_loss": 0.921875, "step": 3425 }, { "dpo_loss": 0.0341796875, "epoch": 0.55, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 4.31790916709642e-07, "loss": 0.2687, "projector_lr": 1.2953727501289262e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.78125, "sft_loss": 0.80859375, "step": 3426 }, { "dpo_loss": 0.07373046875, "epoch": 0.55, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 4.3153941242322345e-07, "loss": 0.0468, "projector_lr": 1.2946182372696703e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.375, "sft_loss": 0.80078125, "step": 3427 }, { "dpo_loss": 0.4609375, "epoch": 0.55, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 4.312879257882443e-07, "loss": 0.2512, "projector_lr": 1.293863777364733e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 3.5625, "rewards_train/rejected": -5.3125, "sft_loss": 0.828125, "step": 3428 }, { "dpo_loss": 0.2197265625, "epoch": 0.55, "final_loss": 0.2197265625, "grad_norm": 0.0, "learning_rate": 4.310364568695464e-07, "loss": 0.1523, "projector_lr": 1.2931093706086392e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.09375, "sft_loss": 0.7265625, "step": 3429 }, { "dpo_loss": 0.10888671875, "epoch": 0.55, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 4.307850057319667e-07, "loss": 0.1129, "projector_lr": 1.2923550171959002e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.15625, "sft_loss": 0.7578125, "step": 3430 }, { "dpo_loss": 0.125, "epoch": 0.55, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 4.305335724403377e-07, "loss": 0.1097, "projector_lr": 1.2916007173210132e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.8125, "sft_loss": 0.7109375, "step": 3431 }, { "dpo_loss": 0.1279296875, "epoch": 0.55, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 4.3028215705948744e-07, "loss": 0.5631, "projector_lr": 1.2908464711784625e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 5.28125, "rewards_train/rejected": -7.1875, "sft_loss": 0.8671875, "step": 3432 }, { "dpo_loss": 0.052734375, "epoch": 0.55, "final_loss": 0.052734375, "grad_norm": 0.0, "learning_rate": 4.300307596542393e-07, "loss": 0.1355, "projector_lr": 1.290092278962718e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.03125, "sft_loss": 0.6171875, "step": 3433 }, { "dpo_loss": 0.087890625, "epoch": 0.55, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 4.2977938028941194e-07, "loss": 0.0564, "projector_lr": 1.289338140868236e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.34375, "sft_loss": 0.76171875, "step": 3434 }, { "dpo_loss": 0.0478515625, "epoch": 0.55, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 4.295280190298194e-07, "loss": 0.1806, "projector_lr": 1.2885840570894582e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.90625, "sft_loss": 0.80078125, "step": 3435 }, { "dpo_loss": 0.027587890625, "epoch": 0.55, "final_loss": 0.027587890625, "grad_norm": 0.0, "learning_rate": 4.2927667594027116e-07, "loss": 0.034, "projector_lr": 1.2878300278208136e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 6.40625, "rewards_train/rejected": -7.375, "sft_loss": 0.74609375, "step": 3436 }, { "dpo_loss": 0.419921875, "epoch": 0.55, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 4.290253510855717e-07, "loss": 0.3564, "projector_lr": 1.2870760532567151e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.46875, "sft_loss": 0.953125, "step": 3437 }, { "dpo_loss": 1.0625, "epoch": 0.55, "final_loss": 1.0625, "grad_norm": 0.0, "learning_rate": 4.287740445305212e-07, "loss": 0.5704, "projector_lr": 1.2863221335915636e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.46875, "rewards_train/margins": 2.703125, "rewards_train/rejected": -5.1875, "sft_loss": 1.0625, "step": 3438 }, { "dpo_loss": 0.185546875, "epoch": 0.55, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 4.2852275633991477e-07, "loss": 0.094, "projector_lr": 1.2855682690197443e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.15625, "sft_loss": 0.62109375, "step": 3439 }, { "dpo_loss": 0.05078125, "epoch": 0.55, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 4.2827148657854293e-07, "loss": 0.1616, "projector_lr": 1.2848144597356288e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.0625, "sft_loss": 0.74609375, "step": 3440 }, { "dpo_loss": 0.00830078125, "epoch": 0.55, "final_loss": 0.00830078125, "grad_norm": 0.0, "learning_rate": 4.280202353111917e-07, "loss": 0.3042, "projector_lr": 1.2840607059335751e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.6875, "sft_loss": 0.8203125, "step": 3441 }, { "dpo_loss": 0.0400390625, "epoch": 0.55, "final_loss": 0.0400390625, "grad_norm": 0.0, "learning_rate": 4.2776900260264203e-07, "loss": 0.0773, "projector_lr": 1.2833070078079262e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.8125, "sft_loss": 0.921875, "step": 3442 }, { "dpo_loss": 0.1572265625, "epoch": 0.55, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 4.2751778851767004e-07, "loss": 0.2456, "projector_lr": 1.2825533655530104e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.228515625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.8125, "sft_loss": 0.7578125, "step": 3443 }, { "dpo_loss": 0.197265625, "epoch": 0.55, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 4.2726659312104717e-07, "loss": 0.1536, "projector_lr": 1.2817997793631415e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.78125, "sft_loss": 0.62109375, "step": 3444 }, { "dpo_loss": 0.228515625, "epoch": 0.55, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 4.2701541647754004e-07, "loss": 0.1763, "projector_lr": 1.2810462494326202e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.6875, "sft_loss": 0.59375, "step": 3445 }, { "dpo_loss": 0.330078125, "epoch": 0.55, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 4.2676425865191047e-07, "loss": 0.5259, "projector_lr": 1.2802927759557314e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.15625, "sft_loss": 0.69921875, "step": 3446 }, { "dpo_loss": 0.19921875, "epoch": 0.55, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 4.2651311970891555e-07, "loss": 0.1698, "projector_lr": 1.2795393591267468e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.484375, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.75, "sft_loss": 0.59765625, "step": 3447 }, { "dpo_loss": 0.44140625, "epoch": 0.55, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 4.26261999713307e-07, "loss": 0.2765, "projector_lr": 1.278785999139921e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.59375, "sft_loss": 1.0390625, "step": 3448 }, { "dpo_loss": 0.498046875, "epoch": 0.55, "final_loss": 0.498046875, "grad_norm": 0.0, "learning_rate": 4.260108987298321e-07, "loss": 0.3132, "projector_lr": 1.2780326961894964e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.125, "sft_loss": 0.54296875, "step": 3449 }, { "dpo_loss": 0.48046875, "epoch": 0.55, "final_loss": 0.48046875, "grad_norm": 0.0, "learning_rate": 4.257598168232332e-07, "loss": 0.2589, "projector_lr": 1.2772794504696997e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 2.78125, "rewards_train/rejected": -4.40625, "sft_loss": 0.83203125, "step": 3450 }, { "dpo_loss": 0.205078125, "epoch": 0.55, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 4.255087540582476e-07, "loss": 0.2196, "projector_lr": 1.2765262621747428e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.375, "sft_loss": 1.2734375, "step": 3451 }, { "dpo_loss": 0.1123046875, "epoch": 0.55, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 4.2525771049960797e-07, "loss": 0.0694, "projector_lr": 1.275773131498824e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.09375, "sft_loss": 0.9609375, "step": 3452 }, { "dpo_loss": 0.024169921875, "epoch": 0.55, "final_loss": 0.024169921875, "grad_norm": 0.0, "learning_rate": 4.2500668621204127e-07, "loss": 0.041, "projector_lr": 1.2750200586361239e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.5625, "sft_loss": 0.60546875, "step": 3453 }, { "dpo_loss": 0.26953125, "epoch": 0.55, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 4.2475568126027036e-07, "loss": 0.3983, "projector_lr": 1.274267043780811e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.234375, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.84375, "sft_loss": 0.96875, "step": 3454 }, { "dpo_loss": 0.32421875, "epoch": 0.55, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 4.245046957090126e-07, "loss": 0.2462, "projector_lr": 1.273514087127038e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.5625, "sft_loss": 0.89453125, "step": 3455 }, { "dpo_loss": 0.1455078125, "epoch": 0.55, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 4.2425372962298075e-07, "loss": 0.0795, "projector_lr": 1.2727611888689422e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.46875, "sft_loss": 0.72265625, "step": 3456 }, { "dpo_loss": 0.0155029296875, "epoch": 0.55, "final_loss": 0.0155029296875, "grad_norm": 0.0, "learning_rate": 4.2400278306688185e-07, "loss": 0.0379, "projector_lr": 1.2720083492006458e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.640625, "rewards_train/margins": 6.6875, "rewards_train/rejected": -7.34375, "sft_loss": 0.515625, "step": 3457 }, { "dpo_loss": 0.10498046875, "epoch": 0.55, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 4.237518561054186e-07, "loss": 0.0627, "projector_lr": 1.271255568316256e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.0625, "sft_loss": 1.21875, "step": 3458 }, { "dpo_loss": 0.04541015625, "epoch": 0.55, "final_loss": 0.04541015625, "grad_norm": 0.0, "learning_rate": 4.2350094880328846e-07, "loss": 0.0359, "projector_lr": 1.2705028464098654e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.21875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.625, "sft_loss": 0.7734375, "step": 3459 }, { "dpo_loss": 0.400390625, "epoch": 0.55, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 4.232500612251837e-07, "loss": 0.2283, "projector_lr": 1.2697501836755513e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.875, "rewards_train/rejected": -4.4375, "sft_loss": 0.99609375, "step": 3460 }, { "dpo_loss": 0.10986328125, "epoch": 0.55, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 4.229991934357918e-07, "loss": 0.239, "projector_lr": 1.2689975803073753e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.75, "sft_loss": 0.94140625, "step": 3461 }, { "dpo_loss": 0.25, "epoch": 0.55, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 4.227483454997944e-07, "loss": 0.1926, "projector_lr": 1.2682450364993834e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -5.28125, "sft_loss": 0.9765625, "step": 3462 }, { "dpo_loss": 0.875, "epoch": 0.55, "final_loss": 0.875, "grad_norm": 0.0, "learning_rate": 4.224975174818688e-07, "loss": 0.4862, "projector_lr": 1.2674925524456065e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 2.046875, "rewards_train/rejected": -4.03125, "sft_loss": 0.73828125, "step": 3463 }, { "dpo_loss": 0.2041015625, "epoch": 0.55, "final_loss": 0.2041015625, "grad_norm": 0.0, "learning_rate": 4.2224670944668694e-07, "loss": 0.1257, "projector_lr": 1.2667401283400608e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.90625, "sft_loss": 0.66015625, "step": 3464 }, { "dpo_loss": 0.22265625, "epoch": 0.55, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 4.219959214589157e-07, "loss": 0.2527, "projector_lr": 1.2659877643767472e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.75, "sft_loss": 0.93359375, "step": 3465 }, { "dpo_loss": 0.26953125, "epoch": 0.55, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 4.217451535832162e-07, "loss": 0.2706, "projector_lr": 1.2652354607496488e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.96875, "sft_loss": 0.87890625, "step": 3466 }, { "dpo_loss": 0.11181640625, "epoch": 0.55, "final_loss": 0.11181640625, "grad_norm": 0.0, "learning_rate": 4.214944058842451e-07, "loss": 0.1486, "projector_lr": 1.2644832176527354e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.625, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.53125, "sft_loss": 0.58984375, "step": 3467 }, { "dpo_loss": 0.185546875, "epoch": 0.55, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 4.2124367842665355e-07, "loss": 0.0992, "projector_lr": 1.2637310352799608e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.1875, "sft_loss": 0.55859375, "step": 3468 }, { "dpo_loss": 0.6875, "epoch": 0.56, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 4.2099297127508755e-07, "loss": 0.4405, "projector_lr": 1.2629789138252628e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.6875, "sft_loss": 0.7578125, "step": 3469 }, { "dpo_loss": 0.06591796875, "epoch": 0.56, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 4.2074228449418784e-07, "loss": 0.1204, "projector_lr": 1.2622268534825636e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.0625, "sft_loss": 1.0, "step": 3470 }, { "dpo_loss": 0.76171875, "epoch": 0.56, "final_loss": 0.76171875, "grad_norm": 0.0, "learning_rate": 4.2049161814858993e-07, "loss": 0.541, "projector_lr": 1.2614748544457699e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.65625, "sft_loss": 0.921875, "step": 3471 }, { "dpo_loss": 0.09130859375, "epoch": 0.56, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 4.2024097230292375e-07, "loss": 0.1051, "projector_lr": 1.2607229169087715e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22265625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -4.90625, "sft_loss": 0.70703125, "step": 3472 }, { "dpo_loss": 0.353515625, "epoch": 0.56, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 4.1999034702181447e-07, "loss": 0.2208, "projector_lr": 1.2599710410654436e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.09375, "sft_loss": 0.88671875, "step": 3473 }, { "dpo_loss": 0.06884765625, "epoch": 0.56, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 4.1973974236988175e-07, "loss": 0.1295, "projector_lr": 1.2592192271096454e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.875, "sft_loss": 0.9453125, "step": 3474 }, { "dpo_loss": 0.365234375, "epoch": 0.56, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 4.1948915841173967e-07, "loss": 0.3478, "projector_lr": 1.2584674752352191e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.3125, "sft_loss": 0.796875, "step": 3475 }, { "dpo_loss": 0.14453125, "epoch": 0.56, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 4.192385952119973e-07, "loss": 0.161, "projector_lr": 1.257715785635992e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.15625, "rewards_train/margins": 3.203125, "rewards_train/rejected": -5.375, "sft_loss": 0.828125, "step": 3476 }, { "dpo_loss": 0.197265625, "epoch": 0.56, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 4.1898805283525834e-07, "loss": 0.2122, "projector_lr": 1.256964158505775e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.21875, "sft_loss": 0.84765625, "step": 3477 }, { "dpo_loss": 0.4375, "epoch": 0.56, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 4.187375313461209e-07, "loss": 0.3635, "projector_lr": 1.2562125940383628e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.3125, "sft_loss": 1.0546875, "step": 3478 }, { "dpo_loss": 0.006805419921875, "epoch": 0.56, "final_loss": 0.006805419921875, "grad_norm": 0.0, "learning_rate": 4.1848703080917794e-07, "loss": 0.0331, "projector_lr": 1.255461092427534e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.65625, "sft_loss": 0.64453125, "step": 3479 }, { "dpo_loss": 0.2490234375, "epoch": 0.56, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 4.182365512890168e-07, "loss": 0.2145, "projector_lr": 1.2547096538670505e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.59375, "sft_loss": 0.90625, "step": 3480 }, { "dpo_loss": 0.265625, "epoch": 0.56, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 4.1798609285021965e-07, "loss": 0.1667, "projector_lr": 1.253958278550659e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.1875, "sft_loss": 0.80859375, "step": 3481 }, { "dpo_loss": 0.1904296875, "epoch": 0.56, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 4.177356555573629e-07, "loss": 0.2178, "projector_lr": 1.2532069666720889e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.375, "sft_loss": 0.48828125, "step": 3482 }, { "dpo_loss": 0.154296875, "epoch": 0.56, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 4.174852394750181e-07, "loss": 0.3974, "projector_lr": 1.2524557184250542e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.71875, "sft_loss": 0.61328125, "step": 3483 }, { "dpo_loss": 0.1611328125, "epoch": 0.56, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 4.172348446677504e-07, "loss": 0.2297, "projector_lr": 1.2517045340032513e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46875, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.625, "sft_loss": 0.87109375, "step": 3484 }, { "dpo_loss": 0.291015625, "epoch": 0.56, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 4.169844712001202e-07, "loss": 0.2054, "projector_lr": 1.2509534136003607e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.125, "sft_loss": 0.8125, "step": 3485 }, { "dpo_loss": 0.049560546875, "epoch": 0.56, "final_loss": 0.049560546875, "grad_norm": 0.0, "learning_rate": 4.167341191366823e-07, "loss": 0.0973, "projector_lr": 1.2502023574100468e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.5, "sft_loss": 0.61328125, "step": 3486 }, { "dpo_loss": 0.302734375, "epoch": 0.56, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 4.1648378854198574e-07, "loss": 0.3329, "projector_lr": 1.2494513656259573e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 2.359375, "rewards_train/rejected": -3.046875, "sft_loss": 1.046875, "step": 3487 }, { "dpo_loss": 0.10791015625, "epoch": 0.56, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 4.1623347948057445e-07, "loss": 0.0884, "projector_lr": 1.2487004384417236e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.40625, "sft_loss": 0.6875, "step": 3488 }, { "dpo_loss": 0.1640625, "epoch": 0.56, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 4.159831920169861e-07, "loss": 0.2385, "projector_lr": 1.2479495760509583e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.53125, "sft_loss": 0.8984375, "step": 3489 }, { "dpo_loss": 0.28125, "epoch": 0.56, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 4.157329262157534e-07, "loss": 0.1794, "projector_lr": 1.2471987786472603e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.9375, "sft_loss": 0.67578125, "step": 3490 }, { "dpo_loss": 0.74609375, "epoch": 0.56, "final_loss": 0.74609375, "grad_norm": 0.0, "learning_rate": 4.1548268214140326e-07, "loss": 0.4244, "projector_lr": 1.2464480464242098e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8125, "rewards_train/margins": 3.515625, "rewards_train/rejected": -5.3125, "sft_loss": 0.88671875, "step": 3491 }, { "dpo_loss": 0.033447265625, "epoch": 0.56, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 4.1523245985845716e-07, "loss": 0.0777, "projector_lr": 1.2456973795753716e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.40625, "sft_loss": 0.58203125, "step": 3492 }, { "dpo_loss": 0.04541015625, "epoch": 0.56, "final_loss": 0.04541015625, "grad_norm": 0.0, "learning_rate": 4.149822594314306e-07, "loss": 0.0249, "projector_lr": 1.2449467782942919e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.1875, "sft_loss": 0.828125, "step": 3493 }, { "dpo_loss": 0.1650390625, "epoch": 0.56, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 4.147320809248336e-07, "loss": 0.1924, "projector_lr": 1.2441962427745009e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.96875, "sft_loss": 0.91015625, "step": 3494 }, { "dpo_loss": 0.158203125, "epoch": 0.56, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 4.144819244031708e-07, "loss": 0.2266, "projector_lr": 1.2434457732095123e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.75, "sft_loss": 0.74609375, "step": 3495 }, { "dpo_loss": 0.1875, "epoch": 0.56, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 4.142317899309408e-07, "loss": 0.1199, "projector_lr": 1.2426953697928224e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.25, "sft_loss": 0.7578125, "step": 3496 }, { "dpo_loss": 0.0732421875, "epoch": 0.56, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 4.139816775726369e-07, "loss": 0.1019, "projector_lr": 1.2419450327179108e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.859375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -7.0625, "sft_loss": 0.81640625, "step": 3497 }, { "dpo_loss": 0.271484375, "epoch": 0.56, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 4.137315873927462e-07, "loss": 0.2405, "projector_lr": 1.2411947621782385e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 2.359375, "rewards_train/rejected": -3.078125, "sft_loss": 0.953125, "step": 3498 }, { "dpo_loss": 0.01806640625, "epoch": 0.56, "final_loss": 0.01806640625, "grad_norm": 0.0, "learning_rate": 4.134815194557504e-07, "loss": 0.0496, "projector_lr": 1.2404445583672513e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.65625, "sft_loss": 0.80859375, "step": 3499 }, { "dpo_loss": 0.44140625, "epoch": 0.56, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 4.1323147382612547e-07, "loss": 0.2736, "projector_lr": 1.2396944214783766e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.984375, "sft_loss": 0.76953125, "step": 3500 }, { "dpo_loss": 0.09130859375, "epoch": 0.56, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 4.1298145056834185e-07, "loss": 0.1003, "projector_lr": 1.2389443517050255e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.8125, "sft_loss": 0.76953125, "step": 3501 }, { "dpo_loss": 0.30078125, "epoch": 0.56, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 4.1273144974686334e-07, "loss": 0.454, "projector_lr": 1.2381943492405902e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.75, "sft_loss": 0.7890625, "step": 3502 }, { "dpo_loss": 0.25390625, "epoch": 0.56, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 4.12481471426149e-07, "loss": 0.23, "projector_lr": 1.237444414278447e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.8125, "sft_loss": 0.765625, "step": 3503 }, { "dpo_loss": 0.041748046875, "epoch": 0.56, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 4.122315156706514e-07, "loss": 0.0891, "projector_lr": 1.2366945470119544e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -6.96875, "sft_loss": 0.74609375, "step": 3504 }, { "dpo_loss": 0.2890625, "epoch": 0.56, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 4.1198158254481766e-07, "loss": 0.2533, "projector_lr": 1.2359447476344532e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.15625, "sft_loss": 1.0, "step": 3505 }, { "dpo_loss": 0.30859375, "epoch": 0.56, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 4.117316721130892e-07, "loss": 0.2592, "projector_lr": 1.2351950163392676e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.21875, "sft_loss": 0.453125, "step": 3506 }, { "dpo_loss": 0.498046875, "epoch": 0.56, "final_loss": 0.498046875, "grad_norm": 0.0, "learning_rate": 4.1148178443990073e-07, "loss": 0.2577, "projector_lr": 1.2344453533197023e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 2.546875, "rewards_train/rejected": -4.1875, "sft_loss": 0.78515625, "step": 3507 }, { "dpo_loss": 0.259765625, "epoch": 0.56, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 4.112319195896821e-07, "loss": 0.2405, "projector_lr": 1.2336957587690462e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.96875, "sft_loss": 0.71875, "step": 3508 }, { "dpo_loss": 0.1171875, "epoch": 0.56, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 4.1098207762685664e-07, "loss": 0.0761, "projector_lr": 1.23294623288057e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.84375, "sft_loss": 0.73046875, "step": 3509 }, { "dpo_loss": 0.07275390625, "epoch": 0.56, "final_loss": 0.07275390625, "grad_norm": 0.0, "learning_rate": 4.107322586158423e-07, "loss": 0.0602, "projector_lr": 1.232196775847527e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.65625, "sft_loss": 0.6796875, "step": 3510 }, { "dpo_loss": 0.29296875, "epoch": 0.56, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 4.1048246262105064e-07, "loss": 0.2237, "projector_lr": 1.231447387863152e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 1.65625, "rewards_train/rejected": -2.328125, "sft_loss": 0.7734375, "step": 3511 }, { "dpo_loss": 0.026123046875, "epoch": 0.56, "final_loss": 0.026123046875, "grad_norm": 0.0, "learning_rate": 4.102326897068873e-07, "loss": 0.2158, "projector_lr": 1.2306980691206622e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.15625, "sft_loss": 0.66015625, "step": 3512 }, { "dpo_loss": 0.76171875, "epoch": 0.56, "final_loss": 0.76171875, "grad_norm": 0.0, "learning_rate": 4.0998293993775234e-07, "loss": 0.3843, "projector_lr": 1.2299488198132571e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 3.34375, "rewards_train/rejected": -5.125, "sft_loss": 0.734375, "step": 3513 }, { "dpo_loss": 0.1279296875, "epoch": 0.56, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 4.0973321337803955e-07, "loss": 0.0795, "projector_lr": 1.2291996401341188e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.15625, "sft_loss": 0.5859375, "step": 3514 }, { "dpo_loss": 0.087890625, "epoch": 0.56, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 4.0948351009213697e-07, "loss": 0.1548, "projector_lr": 1.228450530276411e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.703125, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.625, "sft_loss": 0.58984375, "step": 3515 }, { "dpo_loss": 0.2578125, "epoch": 0.56, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 4.092338301444263e-07, "loss": 0.1835, "projector_lr": 1.227701490433279e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.1875, "sft_loss": 0.796875, "step": 3516 }, { "dpo_loss": 0.271484375, "epoch": 0.56, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 4.089841735992835e-07, "loss": 0.2489, "projector_lr": 1.2269525207978506e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.40625, "sft_loss": 0.69921875, "step": 3517 }, { "dpo_loss": 0.1884765625, "epoch": 0.56, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 4.087345405210785e-07, "loss": 0.2012, "projector_lr": 1.2262036215632357e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.46875, "sft_loss": 0.85546875, "step": 3518 }, { "dpo_loss": 0.1826171875, "epoch": 0.56, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 4.084849309741751e-07, "loss": 0.1468, "projector_lr": 1.2254547929225253e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41015625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -4.9375, "sft_loss": 0.74609375, "step": 3519 }, { "dpo_loss": 0.240234375, "epoch": 0.56, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 4.082353450229308e-07, "loss": 0.2966, "projector_lr": 1.2247060350687925e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -7.21875, "sft_loss": 0.75390625, "step": 3520 }, { "dpo_loss": 0.0908203125, "epoch": 0.56, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 4.0798578273169733e-07, "loss": 0.1867, "projector_lr": 1.223957348195092e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.53125, "sft_loss": 0.76171875, "step": 3521 }, { "dpo_loss": 0.255859375, "epoch": 0.56, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 4.0773624416482033e-07, "loss": 0.1855, "projector_lr": 1.2232087324944611e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.71875, "sft_loss": 0.765625, "step": 3522 }, { "dpo_loss": 0.07470703125, "epoch": 0.56, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 4.0748672938663914e-07, "loss": 0.1532, "projector_lr": 1.2224601881599174e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.09375, "sft_loss": 0.71484375, "step": 3523 }, { "dpo_loss": 0.087890625, "epoch": 0.56, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 4.0723723846148734e-07, "loss": 0.1169, "projector_lr": 1.221711715384462e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.0, "sft_loss": 0.98046875, "step": 3524 }, { "dpo_loss": 0.037841796875, "epoch": 0.56, "final_loss": 0.037841796875, "grad_norm": 0.0, "learning_rate": 4.0698777145369163e-07, "loss": 0.0807, "projector_lr": 1.2209633143610748e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.0625, "sft_loss": 0.90625, "step": 3525 }, { "dpo_loss": 0.130859375, "epoch": 0.56, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 4.0673832842757315e-07, "loss": 0.319, "projector_lr": 1.2202149852827195e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.90625, "sft_loss": 0.81640625, "step": 3526 }, { "dpo_loss": 0.30859375, "epoch": 0.56, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 4.0648890944744674e-07, "loss": 0.2164, "projector_lr": 1.2194667283423404e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.71875, "sft_loss": 0.875, "step": 3527 }, { "dpo_loss": 0.09912109375, "epoch": 0.56, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 4.062395145776212e-07, "loss": 0.1697, "projector_lr": 1.2187185437328636e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.71875, "sft_loss": 0.984375, "step": 3528 }, { "dpo_loss": 0.04931640625, "epoch": 0.56, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 4.059901438823984e-07, "loss": 0.1384, "projector_lr": 1.2179704316471955e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.375, "sft_loss": 0.65234375, "step": 3529 }, { "dpo_loss": 0.421875, "epoch": 0.56, "final_loss": 0.421875, "grad_norm": 0.0, "learning_rate": 4.057407974260748e-07, "loss": 0.4881, "projector_lr": 1.2172223922782246e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 3.453125, "rewards_train/rejected": -5.78125, "sft_loss": 1.125, "step": 3530 }, { "dpo_loss": 0.26953125, "epoch": 0.56, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 4.054914752729403e-07, "loss": 0.2193, "projector_lr": 1.216474425818821e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71484375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.8125, "sft_loss": 0.498046875, "step": 3531 }, { "dpo_loss": 0.466796875, "epoch": 0.57, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 4.052421774872785e-07, "loss": 0.2945, "projector_lr": 1.2157265324618356e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.671875, "rewards_train/margins": 2.828125, "rewards_train/rejected": -4.5, "sft_loss": 0.703125, "step": 3532 }, { "dpo_loss": 0.482421875, "epoch": 0.57, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 4.049929041333669e-07, "loss": 0.37, "projector_lr": 1.2149787124001008e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.5625, "sft_loss": 0.9921875, "step": 3533 }, { "dpo_loss": 0.02490234375, "epoch": 0.57, "final_loss": 0.02490234375, "grad_norm": 0.0, "learning_rate": 4.0474365527547616e-07, "loss": 0.3093, "projector_lr": 1.2142309658264286e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1279296875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.53125, "sft_loss": 0.66796875, "step": 3534 }, { "dpo_loss": 0.1552734375, "epoch": 0.57, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 4.0449443097787124e-07, "loss": 0.1923, "projector_lr": 1.2134832929336138e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.875, "sft_loss": 0.96484375, "step": 3535 }, { "dpo_loss": 0.5078125, "epoch": 0.57, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 4.0424523130481047e-07, "loss": 0.3462, "projector_lr": 1.2127356939144316e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -2.0625, "rewards_train/margins": 1.3671875, "rewards_train/rejected": -3.421875, "sft_loss": 1.1015625, "step": 3536 }, { "dpo_loss": 0.01220703125, "epoch": 0.57, "final_loss": 0.01220703125, "grad_norm": 0.0, "learning_rate": 4.0399605632054623e-07, "loss": 0.1102, "projector_lr": 1.2119881689616387e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.375, "sft_loss": 0.828125, "step": 3537 }, { "dpo_loss": 0.1708984375, "epoch": 0.57, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 4.037469060893237e-07, "loss": 0.487, "projector_lr": 1.2112407182679712e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.40625, "sft_loss": 0.75, "step": 3538 }, { "dpo_loss": 0.248046875, "epoch": 0.57, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 4.034977806753824e-07, "loss": 0.1316, "projector_lr": 1.2104933420261471e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.34375, "sft_loss": 0.55859375, "step": 3539 }, { "dpo_loss": 0.2470703125, "epoch": 0.57, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 4.032486801429552e-07, "loss": 0.1711, "projector_lr": 1.2097460404288657e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.40625, "sft_loss": 0.69921875, "step": 3540 }, { "dpo_loss": 0.490234375, "epoch": 0.57, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 4.0299960455626865e-07, "loss": 0.361, "projector_lr": 1.208998813668806e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.875, "sft_loss": 0.66796875, "step": 3541 }, { "dpo_loss": 0.07470703125, "epoch": 0.57, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 4.0275055397954296e-07, "loss": 0.103, "projector_lr": 1.208251661938629e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.40625, "sft_loss": 0.60546875, "step": 3542 }, { "dpo_loss": 0.04248046875, "epoch": 0.57, "final_loss": 0.04248046875, "grad_norm": 0.0, "learning_rate": 4.025015284769913e-07, "loss": 0.1786, "projector_lr": 1.207504585430974e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11865234375, "rewards_train/margins": 6.90625, "rewards_train/rejected": -7.03125, "sft_loss": 0.859375, "step": 3543 }, { "dpo_loss": 0.1220703125, "epoch": 0.57, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 4.0225252811282114e-07, "loss": 0.1285, "projector_lr": 1.2067575843384635e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0400390625, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.3125, "sft_loss": 0.53125, "step": 3544 }, { "dpo_loss": 0.10498046875, "epoch": 0.57, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 4.0200355295123303e-07, "loss": 0.0783, "projector_lr": 1.2060106588536992e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.46875, "sft_loss": 0.8515625, "step": 3545 }, { "dpo_loss": 0.1611328125, "epoch": 0.57, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 4.0175460305642105e-07, "loss": 0.1435, "projector_lr": 1.2052638091692633e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.625, "sft_loss": 0.79296875, "step": 3546 }, { "dpo_loss": 0.16015625, "epoch": 0.57, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 4.0150567849257326e-07, "loss": 0.2704, "projector_lr": 1.20451703547772e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.15625, "sft_loss": 0.95703125, "step": 3547 }, { "dpo_loss": 0.2294921875, "epoch": 0.57, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 4.0125677932387024e-07, "loss": 0.1445, "projector_lr": 1.2037703379716108e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.28125, "sft_loss": 0.640625, "step": 3548 }, { "dpo_loss": 0.1455078125, "epoch": 0.57, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 4.010079056144868e-07, "loss": 0.1485, "projector_lr": 1.2030237168434606e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.03125, "sft_loss": 0.58984375, "step": 3549 }, { "dpo_loss": 0.09228515625, "epoch": 0.57, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 4.0075905742859096e-07, "loss": 0.1712, "projector_lr": 1.202277172285773e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.4375, "sft_loss": 0.8671875, "step": 3550 }, { "dpo_loss": 0.0908203125, "epoch": 0.57, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 4.005102348303442e-07, "loss": 0.2037, "projector_lr": 1.2015307044910327e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.28125, "sft_loss": 0.7578125, "step": 3551 }, { "dpo_loss": 0.01104736328125, "epoch": 0.57, "final_loss": 0.01104736328125, "grad_norm": 0.0, "learning_rate": 4.002614378839012e-07, "loss": 0.1226, "projector_lr": 1.2007843136517037e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1494140625, "rewards_train/margins": 8.5, "rewards_train/rejected": -8.625, "sft_loss": 0.54296875, "step": 3552 }, { "dpo_loss": 0.455078125, "epoch": 0.57, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 4.0001266665341014e-07, "loss": 0.5512, "projector_lr": 1.2000379999602305e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 1.8515625, "rewards_train/rejected": -2.640625, "sft_loss": 0.81640625, "step": 3553 }, { "dpo_loss": 0.33984375, "epoch": 0.57, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 3.9976392120301274e-07, "loss": 0.184, "projector_lr": 1.1992917636090383e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -5.375, "sft_loss": 0.765625, "step": 3554 }, { "dpo_loss": 0.11962890625, "epoch": 0.57, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 3.995152015968439e-07, "loss": 0.1329, "projector_lr": 1.1985456047905317e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.78125, "sft_loss": 0.8203125, "step": 3555 }, { "dpo_loss": 0.11181640625, "epoch": 0.57, "final_loss": 0.11181640625, "grad_norm": 0.0, "learning_rate": 3.9926650789903186e-07, "loss": 0.3598, "projector_lr": 1.1977995236970957e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 6.9375, "rewards_train/rejected": -8.0625, "sft_loss": 0.73828125, "step": 3556 }, { "dpo_loss": 0.04150390625, "epoch": 0.57, "final_loss": 0.04150390625, "grad_norm": 0.0, "learning_rate": 3.990178401736982e-07, "loss": 0.13, "projector_lr": 1.1970535205210948e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.78125, "sft_loss": 0.609375, "step": 3557 }, { "dpo_loss": 0.439453125, "epoch": 0.57, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 3.9876919848495793e-07, "loss": 0.2459, "projector_lr": 1.1963075954548739e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 2.5, "rewards_train/rejected": -4.1875, "sft_loss": 0.9375, "step": 3558 }, { "dpo_loss": 0.12109375, "epoch": 0.57, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 3.9852058289691905e-07, "loss": 0.0879, "projector_lr": 1.1955617486907573e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.15625, "sft_loss": 0.484375, "step": 3559 }, { "dpo_loss": 0.05615234375, "epoch": 0.57, "final_loss": 0.05615234375, "grad_norm": 0.0, "learning_rate": 3.9827199347368317e-07, "loss": 0.1178, "projector_lr": 1.1948159804210496e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.5625, "sft_loss": 0.69921875, "step": 3560 }, { "dpo_loss": 0.203125, "epoch": 0.57, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 3.980234302793448e-07, "loss": 0.1992, "projector_lr": 1.1940702908380346e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.466796875, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.46875, "sft_loss": 0.73828125, "step": 3561 }, { "dpo_loss": 0.0498046875, "epoch": 0.57, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 3.97774893377992e-07, "loss": 0.0829, "projector_lr": 1.193324680133976e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.5625, "sft_loss": 0.55078125, "step": 3562 }, { "dpo_loss": 0.2470703125, "epoch": 0.57, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 3.97526382833706e-07, "loss": 0.3052, "projector_lr": 1.192579148501118e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.96875, "sft_loss": 0.6484375, "step": 3563 }, { "dpo_loss": 0.09912109375, "epoch": 0.57, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 3.972778987105612e-07, "loss": 0.0943, "projector_lr": 1.1918336961316836e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.21875, "sft_loss": 0.79296875, "step": 3564 }, { "dpo_loss": 0.08056640625, "epoch": 0.57, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 3.97029441072625e-07, "loss": 0.1391, "projector_lr": 1.191088323217875e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1357421875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.5, "sft_loss": 0.65234375, "step": 3565 }, { "dpo_loss": 0.02197265625, "epoch": 0.57, "final_loss": 0.02197265625, "grad_norm": 0.0, "learning_rate": 3.9678100998395814e-07, "loss": 0.0427, "projector_lr": 1.1903430299518744e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.1875, "sft_loss": 0.92578125, "step": 3566 }, { "dpo_loss": 0.06201171875, "epoch": 0.57, "final_loss": 0.06201171875, "grad_norm": 0.0, "learning_rate": 3.9653260550861453e-07, "loss": 0.1311, "projector_lr": 1.1895978165258437e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.90625, "sft_loss": 0.470703125, "step": 3567 }, { "dpo_loss": 0.30859375, "epoch": 0.57, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 3.962842277106412e-07, "loss": 0.2051, "projector_lr": 1.1888526831319236e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.75, "sft_loss": 1.4140625, "step": 3568 }, { "dpo_loss": 0.12890625, "epoch": 0.57, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 3.9603587665407843e-07, "loss": 0.2227, "projector_lr": 1.1881076299622355e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.28125, "sft_loss": 0.6171875, "step": 3569 }, { "dpo_loss": 0.171875, "epoch": 0.57, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 3.9578755240295924e-07, "loss": 0.1183, "projector_lr": 1.187362657208878e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53125, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.09375, "sft_loss": 0.70703125, "step": 3570 }, { "dpo_loss": 0.0296630859375, "epoch": 0.57, "final_loss": 0.0296630859375, "grad_norm": 0.0, "learning_rate": 3.9553925502131e-07, "loss": 0.0877, "projector_lr": 1.18661776506393e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 6.65625, "rewards_train/rejected": -7.625, "sft_loss": 0.98046875, "step": 3571 }, { "dpo_loss": 0.5625, "epoch": 0.57, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 3.9529098457315015e-07, "loss": 0.3054, "projector_lr": 1.1858729537194505e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.21875, "sft_loss": 0.75390625, "step": 3572 }, { "dpo_loss": 0.048095703125, "epoch": 0.57, "final_loss": 0.048095703125, "grad_norm": 0.0, "learning_rate": 3.950427411224922e-07, "loss": 0.0899, "projector_lr": 1.1851282233674767e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.15625, "sft_loss": 0.66796875, "step": 3573 }, { "dpo_loss": 0.09423828125, "epoch": 0.57, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 3.9479452473334186e-07, "loss": 0.3736, "projector_lr": 1.1843835742000257e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.59375, "sft_loss": 0.890625, "step": 3574 }, { "dpo_loss": 0.18359375, "epoch": 0.57, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 3.945463354696972e-07, "loss": 0.3275, "projector_lr": 1.1836390064090915e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.53125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -7.5625, "sft_loss": 0.76953125, "step": 3575 }, { "dpo_loss": 0.2119140625, "epoch": 0.57, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 3.9429817339554995e-07, "loss": 0.2304, "projector_lr": 1.1828945201866498e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -5.15625, "sft_loss": 1.0, "step": 3576 }, { "dpo_loss": 0.03271484375, "epoch": 0.57, "final_loss": 0.03271484375, "grad_norm": 0.0, "learning_rate": 3.9405003857488464e-07, "loss": 0.0254, "projector_lr": 1.182150115724654e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.78125, "sft_loss": 0.73828125, "step": 3577 }, { "dpo_loss": 0.40234375, "epoch": 0.57, "final_loss": 0.40234375, "grad_norm": 0.0, "learning_rate": 3.938019310716789e-07, "loss": 0.4251, "projector_lr": 1.1814057932150369e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.4375, "sft_loss": 0.85546875, "step": 3578 }, { "dpo_loss": 0.484375, "epoch": 0.57, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 3.93553850949903e-07, "loss": 0.3139, "projector_lr": 1.1806615528497092e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0625, "rewards_train/margins": 2.546875, "rewards_train/rejected": -3.609375, "sft_loss": 0.89453125, "step": 3579 }, { "dpo_loss": 0.423828125, "epoch": 0.57, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 3.933057982735203e-07, "loss": 0.8609, "projector_lr": 1.179917394820561e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.28125, "sft_loss": 1.046875, "step": 3580 }, { "dpo_loss": 0.1611328125, "epoch": 0.57, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 3.930577731064872e-07, "loss": 0.2312, "projector_lr": 1.1791733193194616e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.78125, "sft_loss": 0.859375, "step": 3581 }, { "dpo_loss": 0.55078125, "epoch": 0.57, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 3.928097755127529e-07, "loss": 0.5021, "projector_lr": 1.1784293265382588e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.375, "rewards_train/margins": 2.015625, "rewards_train/rejected": -3.390625, "sft_loss": 0.828125, "step": 3582 }, { "dpo_loss": 0.12451171875, "epoch": 0.57, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 3.925618055562597e-07, "loss": 0.2621, "projector_lr": 1.1776854166687793e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.9375, "sft_loss": 0.625, "step": 3583 }, { "dpo_loss": 0.2158203125, "epoch": 0.57, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 3.923138633009423e-07, "loss": 0.1739, "projector_lr": 1.176941589902827e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.46875, "sft_loss": 0.9140625, "step": 3584 }, { "dpo_loss": 0.0927734375, "epoch": 0.57, "final_loss": 0.0927734375, "grad_norm": 0.0, "learning_rate": 3.9206594881072865e-07, "loss": 0.0776, "projector_lr": 1.176197846432186e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.75, "sft_loss": 0.69921875, "step": 3585 }, { "dpo_loss": 0.08740234375, "epoch": 0.57, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 3.918180621495393e-07, "loss": 0.0975, "projector_lr": 1.1754541864486181e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71484375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.78125, "sft_loss": 0.6796875, "step": 3586 }, { "dpo_loss": 0.130859375, "epoch": 0.57, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 3.915702033812883e-07, "loss": 0.0964, "projector_lr": 1.1747106101438648e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 4.375, "rewards_train/rejected": -4.96875, "sft_loss": 0.8515625, "step": 3587 }, { "dpo_loss": 0.01177978515625, "epoch": 0.57, "final_loss": 0.01177978515625, "grad_norm": 0.0, "learning_rate": 3.913223725698812e-07, "loss": 0.015, "projector_lr": 1.1739671177096437e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 7.0625, "rewards_train/rejected": -7.8125, "sft_loss": 0.60546875, "step": 3588 }, { "dpo_loss": 0.45703125, "epoch": 0.57, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 3.9107456977921756e-07, "loss": 0.4038, "projector_lr": 1.1732237093376527e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.09375, "sft_loss": 0.640625, "step": 3589 }, { "dpo_loss": 0.46484375, "epoch": 0.57, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 3.9082679507318913e-07, "loss": 0.4, "projector_lr": 1.1724803852195673e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 2.890625, "rewards_train/rejected": -4.0625, "sft_loss": 0.73046875, "step": 3590 }, { "dpo_loss": 0.1357421875, "epoch": 0.57, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 3.905790485156806e-07, "loss": 0.101, "projector_lr": 1.1717371455470417e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.375, "sft_loss": 0.86328125, "step": 3591 }, { "dpo_loss": 0.3125, "epoch": 0.57, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 3.9033133017056944e-07, "loss": 0.2371, "projector_lr": 1.1709939905117084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9453125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.96875, "sft_loss": 0.8984375, "step": 3592 }, { "dpo_loss": 0.294921875, "epoch": 0.57, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 3.900836401017258e-07, "loss": 0.1584, "projector_lr": 1.1702509203051774e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 2.921875, "rewards_train/rejected": -3.5625, "sft_loss": 0.73046875, "step": 3593 }, { "dpo_loss": 0.2001953125, "epoch": 0.58, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 3.898359783730121e-07, "loss": 0.1957, "projector_lr": 1.1695079351190366e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.03125, "sft_loss": 0.64453125, "step": 3594 }, { "dpo_loss": 0.0162353515625, "epoch": 0.58, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 3.8958834504828423e-07, "loss": 0.1097, "projector_lr": 1.1687650351448528e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.205078125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.625, "sft_loss": 0.73046875, "step": 3595 }, { "dpo_loss": 0.318359375, "epoch": 0.58, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 3.893407401913904e-07, "loss": 0.2857, "projector_lr": 1.1680222205741712e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.1875, "sft_loss": 0.671875, "step": 3596 }, { "dpo_loss": 0.201171875, "epoch": 0.58, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 3.890931638661712e-07, "loss": 0.1593, "projector_lr": 1.1672794915985138e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 7.59375, "rewards_train/rejected": -8.0625, "sft_loss": 0.80078125, "step": 3597 }, { "dpo_loss": 0.0341796875, "epoch": 0.58, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 3.8884561613646026e-07, "loss": 0.0701, "projector_lr": 1.166536848409381e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.490234375, "rewards_train/margins": 6.875, "rewards_train/rejected": -6.40625, "sft_loss": 0.77734375, "step": 3598 }, { "dpo_loss": 0.0291748046875, "epoch": 0.58, "final_loss": 0.0291748046875, "grad_norm": 0.0, "learning_rate": 3.885980970660839e-07, "loss": 0.0914, "projector_lr": 1.1657942911982517e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.515625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.21875, "sft_loss": 0.7578125, "step": 3599 }, { "dpo_loss": 0.1083984375, "epoch": 0.58, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 3.883506067188605e-07, "loss": 0.3558, "projector_lr": 1.1650518201565817e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.84375, "sft_loss": 0.6875, "step": 3600 }, { "dpo_loss": 0.1875, "epoch": 0.58, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 3.8810314515860164e-07, "loss": 0.1672, "projector_lr": 1.1643094354758051e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.71875, "sft_loss": 0.6953125, "step": 3601 }, { "dpo_loss": 0.291015625, "epoch": 0.58, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 3.8785571244911105e-07, "loss": 0.2368, "projector_lr": 1.1635671373473331e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.65625, "sft_loss": 0.86328125, "step": 3602 }, { "dpo_loss": 0.052978515625, "epoch": 0.58, "final_loss": 0.052978515625, "grad_norm": 0.0, "learning_rate": 3.876083086541852e-07, "loss": 0.2548, "projector_lr": 1.1628249259625558e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.71875, "sft_loss": 1.03125, "step": 3603 }, { "dpo_loss": 0.0201416015625, "epoch": 0.58, "final_loss": 0.0201416015625, "grad_norm": 0.0, "learning_rate": 3.8736093383761324e-07, "loss": 0.3329, "projector_lr": 1.16208280151284e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.703125, "rewards_train/margins": 6.90625, "rewards_train/rejected": -7.625, "sft_loss": 0.77734375, "step": 3604 }, { "dpo_loss": 0.06689453125, "epoch": 0.58, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 3.871135880631768e-07, "loss": 0.0556, "projector_lr": 1.1613407641895307e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4296875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.21875, "sft_loss": 0.80859375, "step": 3605 }, { "dpo_loss": 0.04150390625, "epoch": 0.58, "final_loss": 0.04150390625, "grad_norm": 0.0, "learning_rate": 3.868662713946497e-07, "loss": 0.0576, "projector_lr": 1.1605988141839492e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.56640625, "rewards_train/margins": 7.03125, "rewards_train/rejected": -6.46875, "sft_loss": 0.46875, "step": 3606 }, { "dpo_loss": 0.3984375, "epoch": 0.58, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 3.866189838957984e-07, "loss": 0.2185, "projector_lr": 1.1598569516873953e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 2.0, "rewards_train/rejected": -3.484375, "sft_loss": 0.90625, "step": 3607 }, { "dpo_loss": 0.130859375, "epoch": 0.58, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 3.8637172563038214e-07, "loss": 0.1728, "projector_lr": 1.1591151768911465e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4765625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.3125, "sft_loss": 0.8203125, "step": 3608 }, { "dpo_loss": 0.208984375, "epoch": 0.58, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 3.8612449666215236e-07, "loss": 0.1764, "projector_lr": 1.1583734899864572e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 7.59375, "rewards_train/rejected": -8.4375, "sft_loss": 0.76953125, "step": 3609 }, { "dpo_loss": 0.30078125, "epoch": 0.58, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 3.8587729705485317e-07, "loss": 0.4032, "projector_lr": 1.1576318911645596e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2890625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.6875, "sft_loss": 0.58203125, "step": 3610 }, { "dpo_loss": 0.119140625, "epoch": 0.58, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 3.8563012687222056e-07, "loss": 0.1598, "projector_lr": 1.1568903806166617e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.53125, "sft_loss": 0.73046875, "step": 3611 }, { "dpo_loss": 0.045166015625, "epoch": 0.58, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 3.853829861779835e-07, "loss": 0.4089, "projector_lr": 1.1561489585339505e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.0625, "sft_loss": 0.875, "step": 3612 }, { "dpo_loss": 0.06982421875, "epoch": 0.58, "final_loss": 0.06982421875, "grad_norm": 0.0, "learning_rate": 3.8513587503586315e-07, "loss": 0.2057, "projector_lr": 1.1554076251075895e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.25, "sft_loss": 0.8984375, "step": 3613 }, { "dpo_loss": 0.1611328125, "epoch": 0.58, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 3.8488879350957334e-07, "loss": 0.3278, "projector_lr": 1.15466638052872e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.90625, "sft_loss": 0.6484375, "step": 3614 }, { "dpo_loss": 0.267578125, "epoch": 0.58, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 3.846417416628194e-07, "loss": 0.2147, "projector_lr": 1.1539252249884583e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 2.609375, "rewards_train/rejected": -4.03125, "sft_loss": 1.2109375, "step": 3615 }, { "dpo_loss": 0.1767578125, "epoch": 0.58, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 3.8439471955929997e-07, "loss": 0.1285, "projector_lr": 1.1531841586779e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.0625, "sft_loss": 1.03125, "step": 3616 }, { "dpo_loss": 0.423828125, "epoch": 0.58, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 3.841477272627056e-07, "loss": 0.2284, "projector_lr": 1.152443181788117e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.71875, "sft_loss": 0.765625, "step": 3617 }, { "dpo_loss": 0.042724609375, "epoch": 0.58, "final_loss": 0.042724609375, "grad_norm": 0.0, "learning_rate": 3.8390076483671916e-07, "loss": 0.0455, "projector_lr": 1.1517022945101575e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.84375, "sft_loss": 0.73828125, "step": 3618 }, { "dpo_loss": 0.087890625, "epoch": 0.58, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 3.836538323450162e-07, "loss": 0.1094, "projector_lr": 1.1509614970350486e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.625, "sft_loss": 0.65234375, "step": 3619 }, { "dpo_loss": 0.037109375, "epoch": 0.58, "final_loss": 0.037109375, "grad_norm": 0.0, "learning_rate": 3.834069298512636e-07, "loss": 0.0923, "projector_lr": 1.150220789553791e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.0625, "sft_loss": 0.7109375, "step": 3620 }, { "dpo_loss": 0.2001953125, "epoch": 0.58, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 3.831600574191215e-07, "loss": 0.3799, "projector_lr": 1.1494801722573646e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 2.890625, "rewards_train/rejected": -4.3125, "sft_loss": 0.8203125, "step": 3621 }, { "dpo_loss": 0.14453125, "epoch": 0.58, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 3.829132151122417e-07, "loss": 0.103, "projector_lr": 1.1487396453367253e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.28125, "sft_loss": 0.70703125, "step": 3622 }, { "dpo_loss": 0.080078125, "epoch": 0.58, "final_loss": 0.080078125, "grad_norm": 0.0, "learning_rate": 3.826664029942689e-07, "loss": 0.1156, "projector_lr": 1.147999208982807e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.4375, "sft_loss": 0.77734375, "step": 3623 }, { "dpo_loss": 0.0576171875, "epoch": 0.58, "final_loss": 0.0576171875, "grad_norm": 0.0, "learning_rate": 3.82419621128839e-07, "loss": 0.1187, "projector_lr": 1.1472588633865172e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.216796875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.6875, "sft_loss": 0.71484375, "step": 3624 }, { "dpo_loss": 0.12255859375, "epoch": 0.58, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 3.821728695795809e-07, "loss": 0.0928, "projector_lr": 1.1465186087387429e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.5625, "sft_loss": 0.63671875, "step": 3625 }, { "dpo_loss": 0.0245361328125, "epoch": 0.58, "final_loss": 0.0245361328125, "grad_norm": 0.0, "learning_rate": 3.819261484101154e-07, "loss": 0.1228, "projector_lr": 1.1457784452303463e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.21875, "sft_loss": 0.671875, "step": 3626 }, { "dpo_loss": 0.1357421875, "epoch": 0.58, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 3.816794576840555e-07, "loss": 0.2096, "projector_lr": 1.1450383730521667e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.375, "sft_loss": 0.59375, "step": 3627 }, { "dpo_loss": 0.029052734375, "epoch": 0.58, "final_loss": 0.029052734375, "grad_norm": 0.0, "learning_rate": 3.814327974650066e-07, "loss": 0.095, "projector_lr": 1.1442983923950198e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.59375, "sft_loss": 0.6484375, "step": 3628 }, { "dpo_loss": 0.072265625, "epoch": 0.58, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 3.811861678165656e-07, "loss": 0.0824, "projector_lr": 1.1435585034496969e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.53125, "sft_loss": 0.66796875, "step": 3629 }, { "dpo_loss": 0.2392578125, "epoch": 0.58, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 3.8093956880232204e-07, "loss": 0.2175, "projector_lr": 1.1428187064069663e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.1875, "sft_loss": 0.6796875, "step": 3630 }, { "dpo_loss": 0.123046875, "epoch": 0.58, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 3.806930004858575e-07, "loss": 0.3074, "projector_lr": 1.1420790014575727e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.71875, "sft_loss": 1.0546875, "step": 3631 }, { "dpo_loss": 0.220703125, "epoch": 0.58, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 3.8044646293074566e-07, "loss": 0.2021, "projector_lr": 1.141339388792237e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.0625, "sft_loss": 0.671875, "step": 3632 }, { "dpo_loss": 0.04541015625, "epoch": 0.58, "final_loss": 0.04541015625, "grad_norm": 0.0, "learning_rate": 3.8019995620055197e-07, "loss": 0.2058, "projector_lr": 1.1405998686016558e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.78125, "sft_loss": 0.72265625, "step": 3633 }, { "dpo_loss": 0.07080078125, "epoch": 0.58, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 3.799534803588343e-07, "loss": 0.1163, "projector_lr": 1.139860441076503e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.828125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.78125, "sft_loss": 0.74609375, "step": 3634 }, { "dpo_loss": 0.328125, "epoch": 0.58, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 3.797070354691423e-07, "loss": 0.1899, "projector_lr": 1.139121106407427e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.125, "sft_loss": 0.91796875, "step": 3635 }, { "dpo_loss": 0.1044921875, "epoch": 0.58, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 3.7946062159501784e-07, "loss": 0.0773, "projector_lr": 1.1383818647850537e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 7.40625, "rewards_train/rejected": -8.8125, "sft_loss": 0.9140625, "step": 3636 }, { "dpo_loss": 0.1796875, "epoch": 0.58, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 3.792142387999947e-07, "loss": 0.1634, "projector_lr": 1.1376427163999843e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.21875, "sft_loss": 1.0, "step": 3637 }, { "dpo_loss": 0.051513671875, "epoch": 0.58, "final_loss": 0.051513671875, "grad_norm": 0.0, "learning_rate": 3.789678871475985e-07, "loss": 0.1167, "projector_lr": 1.1369036614427955e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0849609375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -4.65625, "sft_loss": 0.796875, "step": 3638 }, { "dpo_loss": 0.1357421875, "epoch": 0.58, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 3.7872156670134705e-07, "loss": 0.2288, "projector_lr": 1.1361647001040411e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.9375, "sft_loss": 0.71875, "step": 3639 }, { "dpo_loss": 0.359375, "epoch": 0.58, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 3.7847527752475015e-07, "loss": 0.2246, "projector_lr": 1.1354258325742505e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.71875, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.953125, "sft_loss": 0.62890625, "step": 3640 }, { "dpo_loss": 0.1005859375, "epoch": 0.58, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 3.782290196813093e-07, "loss": 0.0873, "projector_lr": 1.134687059043928e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.59375, "sft_loss": 0.72265625, "step": 3641 }, { "dpo_loss": 0.126953125, "epoch": 0.58, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 3.7798279323451803e-07, "loss": 0.1489, "projector_lr": 1.1339483797035542e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1318359375, "rewards_train/margins": 3.796875, "rewards_train/rejected": -3.921875, "sft_loss": 0.6953125, "step": 3642 }, { "dpo_loss": 0.1318359375, "epoch": 0.58, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 3.777365982478618e-07, "loss": 0.0778, "projector_lr": 1.1332097947435854e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.376953125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.5625, "sft_loss": 0.63671875, "step": 3643 }, { "dpo_loss": 0.022216796875, "epoch": 0.58, "final_loss": 0.022216796875, "grad_norm": 0.0, "learning_rate": 3.7749043478481797e-07, "loss": 0.1097, "projector_lr": 1.132471304354454e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.59375, "sft_loss": 0.703125, "step": 3644 }, { "dpo_loss": 0.26953125, "epoch": 0.58, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 3.772443029088557e-07, "loss": 0.2213, "projector_lr": 1.1317329087265673e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.625, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.328125, "sft_loss": 1.109375, "step": 3645 }, { "dpo_loss": 0.26953125, "epoch": 0.58, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 3.769982026834364e-07, "loss": 0.2439, "projector_lr": 1.1309946080503093e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.71875, "sft_loss": 0.7421875, "step": 3646 }, { "dpo_loss": 0.0595703125, "epoch": 0.58, "final_loss": 0.0595703125, "grad_norm": 0.0, "learning_rate": 3.7675213417201253e-07, "loss": 0.0878, "projector_lr": 1.1302564025160377e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.46875, "sft_loss": 0.62109375, "step": 3647 }, { "dpo_loss": 0.0255126953125, "epoch": 0.58, "final_loss": 0.0255126953125, "grad_norm": 0.0, "learning_rate": 3.7650609743802885e-07, "loss": 0.0348, "projector_lr": 1.1295182923140866e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.953125, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.46875, "sft_loss": 0.80078125, "step": 3648 }, { "dpo_loss": 0.408203125, "epoch": 0.58, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 3.762600925449221e-07, "loss": 0.2246, "projector_lr": 1.1287802776347663e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.78125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -4.46875, "sft_loss": 0.73046875, "step": 3649 }, { "dpo_loss": 0.10302734375, "epoch": 0.58, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 3.760141195561207e-07, "loss": 0.1175, "projector_lr": 1.1280423586683623e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.96875, "sft_loss": 0.71484375, "step": 3650 }, { "dpo_loss": 0.0235595703125, "epoch": 0.58, "final_loss": 0.0235595703125, "grad_norm": 0.0, "learning_rate": 3.7576817853504445e-07, "loss": 0.0753, "projector_lr": 1.1273045356051335e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.3125, "sft_loss": 0.58984375, "step": 3651 }, { "dpo_loss": 0.1513671875, "epoch": 0.58, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 3.7552226954510524e-07, "loss": 0.0811, "projector_lr": 1.1265668086353158e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.59375, "sft_loss": 0.8515625, "step": 3652 }, { "dpo_loss": 0.04736328125, "epoch": 0.58, "final_loss": 0.04736328125, "grad_norm": 0.0, "learning_rate": 3.752763926497068e-07, "loss": 0.1177, "projector_lr": 1.1258291779491207e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8125, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.75, "sft_loss": 0.8984375, "step": 3653 }, { "dpo_loss": 0.0032958984375, "epoch": 0.58, "final_loss": 0.0032958984375, "grad_norm": 0.0, "learning_rate": 3.750305479122445e-07, "loss": 0.061, "projector_lr": 1.1250916437367335e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06201171875, "rewards_train/margins": 6.90625, "rewards_train/rejected": -6.96875, "sft_loss": 0.56640625, "step": 3654 }, { "dpo_loss": 0.015869140625, "epoch": 0.58, "final_loss": 0.015869140625, "grad_norm": 0.0, "learning_rate": 3.747847353961054e-07, "loss": 0.1194, "projector_lr": 1.1243542061883165e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1181640625, "rewards_train/margins": 6.65625, "rewards_train/rejected": -6.75, "sft_loss": 0.53515625, "step": 3655 }, { "dpo_loss": 0.0693359375, "epoch": 0.58, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 3.7453895516466795e-07, "loss": 0.1318, "projector_lr": 1.1236168654940038e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.375, "sft_loss": 0.62890625, "step": 3656 }, { "dpo_loss": 0.1904296875, "epoch": 0.59, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 3.7429320728130275e-07, "loss": 0.102, "projector_lr": 1.1228796218439084e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.5, "sft_loss": 1.2734375, "step": 3657 }, { "dpo_loss": 0.84765625, "epoch": 0.59, "final_loss": 0.84765625, "grad_norm": 0.0, "learning_rate": 3.740474918093718e-07, "loss": 0.4327, "projector_lr": 1.1221424754281155e-06, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -2.265625, "rewards_train/margins": 1.8671875, "rewards_train/rejected": -4.125, "sft_loss": 0.921875, "step": 3658 }, { "dpo_loss": 0.007171630859375, "epoch": 0.59, "final_loss": 0.007171630859375, "grad_norm": 0.0, "learning_rate": 3.73801808812229e-07, "loss": 0.022, "projector_lr": 1.1214054264366872e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01214599609375, "rewards_train/margins": 6.65625, "rewards_train/rejected": -6.65625, "sft_loss": 0.57421875, "step": 3659 }, { "dpo_loss": 0.0478515625, "epoch": 0.59, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 3.735561583532193e-07, "loss": 0.0341, "projector_lr": 1.120668475059658e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 6.5625, "rewards_train/rejected": -7.34375, "sft_loss": 0.58984375, "step": 3660 }, { "dpo_loss": 0.53125, "epoch": 0.59, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 3.7331054049567987e-07, "loss": 0.3043, "projector_lr": 1.1199316214870398e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2197265625, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.21875, "sft_loss": 0.90234375, "step": 3661 }, { "dpo_loss": 0.28515625, "epoch": 0.59, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 3.730649553029393e-07, "loss": 0.2233, "projector_lr": 1.119194865908818e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.388671875, "rewards_train/margins": 6.15625, "rewards_train/rejected": -5.78125, "sft_loss": 0.8046875, "step": 3662 }, { "dpo_loss": 0.07177734375, "epoch": 0.59, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 3.728194028383176e-07, "loss": 0.2147, "projector_lr": 1.1184582085149528e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.4375, "sft_loss": 1.3515625, "step": 3663 }, { "dpo_loss": 0.06298828125, "epoch": 0.59, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 3.7257388316512665e-07, "loss": 0.3705, "projector_lr": 1.11772164949538e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.78125, "sft_loss": 0.69921875, "step": 3664 }, { "dpo_loss": 0.042236328125, "epoch": 0.59, "final_loss": 0.042236328125, "grad_norm": 0.0, "learning_rate": 3.7232839634666936e-07, "loss": 0.0276, "projector_lr": 1.1169851890400081e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.0, "sft_loss": 0.9453125, "step": 3665 }, { "dpo_loss": 0.134765625, "epoch": 0.59, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 3.720829424462405e-07, "loss": 0.0771, "projector_lr": 1.1162488273387215e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.46875, "sft_loss": 0.83984375, "step": 3666 }, { "dpo_loss": 0.0693359375, "epoch": 0.59, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 3.7183752152712653e-07, "loss": 0.061, "projector_lr": 1.1155125645813797e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.5, "sft_loss": 0.49609375, "step": 3667 }, { "dpo_loss": 0.11376953125, "epoch": 0.59, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 3.7159213365260534e-07, "loss": 0.328, "projector_lr": 1.114776400957816e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.625, "sft_loss": 0.7265625, "step": 3668 }, { "dpo_loss": 0.67578125, "epoch": 0.59, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 3.7134677888594565e-07, "loss": 0.4089, "projector_lr": 1.1140403366578372e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.328125, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.96875, "sft_loss": 0.78125, "step": 3669 }, { "dpo_loss": 0.040771484375, "epoch": 0.59, "final_loss": 0.040771484375, "grad_norm": 0.0, "learning_rate": 3.7110145729040855e-07, "loss": 0.0437, "projector_lr": 1.1133043718712258e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.53125, "sft_loss": 0.7265625, "step": 3670 }, { "dpo_loss": 0.82421875, "epoch": 0.59, "final_loss": 0.82421875, "grad_norm": 0.0, "learning_rate": 3.7085616892924607e-07, "loss": 0.4695, "projector_lr": 1.1125685067877384e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.421875, "rewards_train/margins": 2.453125, "rewards_train/rejected": -2.875, "sft_loss": 0.640625, "step": 3671 }, { "dpo_loss": 0.396484375, "epoch": 0.59, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 3.7061091386570186e-07, "loss": 0.2505, "projector_lr": 1.1118327415971057e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.0625, "sft_loss": 0.8046875, "step": 3672 }, { "dpo_loss": 0.00189971923828125, "epoch": 0.59, "final_loss": 0.00189971923828125, "grad_norm": 0.0, "learning_rate": 3.7036569216301095e-07, "loss": 0.0566, "projector_lr": 1.111097076489033e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37890625, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.21875, "sft_loss": 0.58203125, "step": 3673 }, { "dpo_loss": 0.031494140625, "epoch": 0.59, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 3.7012050388439965e-07, "loss": 0.0397, "projector_lr": 1.110361511653199e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.578125, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.625, "sft_loss": 0.91796875, "step": 3674 }, { "dpo_loss": 0.08056640625, "epoch": 0.59, "final_loss": 0.08056640625, "grad_norm": 0.0, "learning_rate": 3.6987534909308574e-07, "loss": 0.3114, "projector_lr": 1.1096260472792573e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.9375, "sft_loss": 0.68359375, "step": 3675 }, { "dpo_loss": 0.0015869140625, "epoch": 0.59, "final_loss": 0.0015869140625, "grad_norm": 0.0, "learning_rate": 3.696302278522783e-07, "loss": 0.0451, "projector_lr": 1.1088906835568349e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.53125, "sft_loss": 0.55859375, "step": 3676 }, { "dpo_loss": 0.125, "epoch": 0.59, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 3.6938514022517794e-07, "loss": 0.1134, "projector_lr": 1.1081554206755339e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.15625, "sft_loss": 0.7265625, "step": 3677 }, { "dpo_loss": 0.365234375, "epoch": 0.59, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 3.6914008627497636e-07, "loss": 0.2982, "projector_lr": 1.1074202588249292e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 4.25, "rewards_train/rejected": -6.03125, "sft_loss": 1.1875, "step": 3678 }, { "dpo_loss": 0.1416015625, "epoch": 0.59, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 3.6889506606485675e-07, "loss": 0.1741, "projector_lr": 1.1066851981945705e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.015625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.6875, "sft_loss": 0.828125, "step": 3679 }, { "dpo_loss": 0.3359375, "epoch": 0.59, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 3.686500796579936e-07, "loss": 0.8219, "projector_lr": 1.105950238973981e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 7.125, "rewards_train/rejected": -7.6875, "sft_loss": 0.6875, "step": 3680 }, { "dpo_loss": 0.1376953125, "epoch": 0.59, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 3.684051271175528e-07, "loss": 0.1181, "projector_lr": 1.1052153813526584e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.125, "sft_loss": 0.640625, "step": 3681 }, { "dpo_loss": 0.1376953125, "epoch": 0.59, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 3.6816020850669103e-07, "loss": 0.0834, "projector_lr": 1.1044806255200732e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.15625, "sft_loss": 0.828125, "step": 3682 }, { "dpo_loss": 0.287109375, "epoch": 0.59, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 3.679153238885566e-07, "loss": 0.3019, "projector_lr": 1.10374597166567e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.4375, "sft_loss": 0.73046875, "step": 3683 }, { "dpo_loss": 0.30078125, "epoch": 0.59, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 3.676704733262891e-07, "loss": 0.4908, "projector_lr": 1.1030114199788674e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.388671875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.53125, "sft_loss": 0.85546875, "step": 3684 }, { "dpo_loss": 0.154296875, "epoch": 0.59, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 3.6742565688301906e-07, "loss": 0.1488, "projector_lr": 1.1022769706490573e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.4375, "sft_loss": 0.70703125, "step": 3685 }, { "dpo_loss": 0.076171875, "epoch": 0.59, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 3.6718087462186886e-07, "loss": 0.1927, "projector_lr": 1.1015426238656066e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34375, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.71875, "sft_loss": 0.6796875, "step": 3686 }, { "dpo_loss": 0.2265625, "epoch": 0.59, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 3.6693612660595096e-07, "loss": 0.3578, "projector_lr": 1.100808379817853e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.625, "sft_loss": 0.7890625, "step": 3687 }, { "dpo_loss": 0.03955078125, "epoch": 0.59, "final_loss": 0.03955078125, "grad_norm": 0.0, "learning_rate": 3.666914128983699e-07, "loss": 0.0605, "projector_lr": 1.1000742386951098e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.25, "sft_loss": 0.73046875, "step": 3688 }, { "dpo_loss": 0.12890625, "epoch": 0.59, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 3.6644673356222104e-07, "loss": 0.0903, "projector_lr": 1.0993402006866633e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.234375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.8125, "sft_loss": 0.7890625, "step": 3689 }, { "dpo_loss": 0.470703125, "epoch": 0.59, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 3.662020886605911e-07, "loss": 0.2401, "projector_lr": 1.0986062659817734e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.005859375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.3125, "sft_loss": 1.046875, "step": 3690 }, { "dpo_loss": 0.2275390625, "epoch": 0.59, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 3.6595747825655774e-07, "loss": 0.2133, "projector_lr": 1.0978724347696733e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.1875, "sft_loss": 0.91015625, "step": 3691 }, { "dpo_loss": 0.04541015625, "epoch": 0.59, "final_loss": 0.04541015625, "grad_norm": 0.0, "learning_rate": 3.657129024131895e-07, "loss": 0.0555, "projector_lr": 1.0971387072395686e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.5625, "sft_loss": 0.69140625, "step": 3692 }, { "dpo_loss": 0.181640625, "epoch": 0.59, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 3.654683611935464e-07, "loss": 0.198, "projector_lr": 1.0964050835806393e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.0, "sft_loss": 0.76171875, "step": 3693 }, { "dpo_loss": 0.06396484375, "epoch": 0.59, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 3.652238546606793e-07, "loss": 0.036, "projector_lr": 1.095671563982038e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.474609375, "rewards_train/margins": 5.375, "rewards_train/rejected": -4.90625, "sft_loss": 0.75390625, "step": 3694 }, { "dpo_loss": 0.158203125, "epoch": 0.59, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 3.6497938287763054e-07, "loss": 0.0952, "projector_lr": 1.0949381486328916e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.375, "sft_loss": 0.9921875, "step": 3695 }, { "dpo_loss": 0.14453125, "epoch": 0.59, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 3.6473494590743265e-07, "loss": 0.2153, "projector_lr": 1.094204837722298e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.59375, "sft_loss": 0.67578125, "step": 3696 }, { "dpo_loss": 0.189453125, "epoch": 0.59, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 3.6449054381310993e-07, "loss": 0.1038, "projector_lr": 1.0934716314393298e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.0, "sft_loss": 0.7578125, "step": 3697 }, { "dpo_loss": 0.072265625, "epoch": 0.59, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 3.642461766576774e-07, "loss": 0.0799, "projector_lr": 1.0927385299730323e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.21875, "sft_loss": 0.671875, "step": 3698 }, { "dpo_loss": 0.4609375, "epoch": 0.59, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 3.640018445041413e-07, "loss": 0.337, "projector_lr": 1.092005533512424e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.251953125, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.0625, "sft_loss": 0.6640625, "step": 3699 }, { "dpo_loss": 0.0830078125, "epoch": 0.59, "final_loss": 0.0830078125, "grad_norm": 0.0, "learning_rate": 3.637575474154987e-07, "loss": 0.1027, "projector_lr": 1.0912726422464962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.4375, "sft_loss": 0.65234375, "step": 3700 }, { "dpo_loss": 0.051025390625, "epoch": 0.59, "final_loss": 0.051025390625, "grad_norm": 0.0, "learning_rate": 3.635132854547372e-07, "loss": 0.0442, "projector_lr": 1.0905398563642116e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 7.4375, "rewards_train/rejected": -7.875, "sft_loss": 0.72265625, "step": 3701 }, { "dpo_loss": 0.11376953125, "epoch": 0.59, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 3.63269058684836e-07, "loss": 0.2508, "projector_lr": 1.089807176054508e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.96875, "sft_loss": 0.75390625, "step": 3702 }, { "dpo_loss": 0.0791015625, "epoch": 0.59, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 3.630248671687649e-07, "loss": 0.1103, "projector_lr": 1.0890746015062948e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 6.65625, "rewards_train/rejected": -7.25, "sft_loss": 0.451171875, "step": 3703 }, { "dpo_loss": 0.1884765625, "epoch": 0.59, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 3.627807109694849e-07, "loss": 0.2828, "projector_lr": 1.0883421329084549e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 3.703125, "rewards_train/rejected": -5.28125, "sft_loss": 0.71875, "step": 3704 }, { "dpo_loss": 0.0022735595703125, "epoch": 0.59, "final_loss": 0.0022735595703125, "grad_norm": 0.0, "learning_rate": 3.625365901499474e-07, "loss": 0.1048, "projector_lr": 1.0876097704498423e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0185546875, "rewards_train/margins": 7.84375, "rewards_train/rejected": -7.8125, "sft_loss": 0.7578125, "step": 3705 }, { "dpo_loss": 1.2734375, "epoch": 0.59, "final_loss": 1.2734375, "grad_norm": 0.0, "learning_rate": 3.62292504773095e-07, "loss": 0.7976, "projector_lr": 1.086877514319285e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.046875, "rewards_train/margins": 1.546875, "rewards_train/rejected": -3.59375, "sft_loss": 0.671875, "step": 3706 }, { "dpo_loss": 0.33203125, "epoch": 0.59, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 3.6204845490186107e-07, "loss": 0.2835, "projector_lr": 1.0861453647055834e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.03125, "sft_loss": 0.765625, "step": 3707 }, { "dpo_loss": 0.234375, "epoch": 0.59, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 3.6180444059917e-07, "loss": 0.1494, "projector_lr": 1.0854133217975103e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.78125, "sft_loss": 0.69140625, "step": 3708 }, { "dpo_loss": 0.1318359375, "epoch": 0.59, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 3.615604619279371e-07, "loss": 0.0781, "projector_lr": 1.0846813857838114e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.20703125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.3125, "sft_loss": 0.8515625, "step": 3709 }, { "dpo_loss": 0.059326171875, "epoch": 0.59, "final_loss": 0.059326171875, "grad_norm": 0.0, "learning_rate": 3.613165189510677e-07, "loss": 0.1191, "projector_lr": 1.083949556853203e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09716796875, "rewards_train/margins": 7.8125, "rewards_train/rejected": -7.9375, "sft_loss": 0.66796875, "step": 3710 }, { "dpo_loss": 0.09716796875, "epoch": 0.59, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 3.6107261173145873e-07, "loss": 0.0561, "projector_lr": 1.0832178351943763e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.28125, "sft_loss": 1.1640625, "step": 3711 }, { "dpo_loss": 0.078125, "epoch": 0.59, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 3.6082874033199786e-07, "loss": 0.156, "projector_lr": 1.0824862209959936e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.375, "sft_loss": 0.59765625, "step": 3712 }, { "dpo_loss": 0.1171875, "epoch": 0.59, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 3.6058490481556315e-07, "loss": 0.3137, "projector_lr": 1.0817547144466894e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 6.40625, "rewards_train/rejected": -7.8125, "sft_loss": 0.82421875, "step": 3713 }, { "dpo_loss": 0.279296875, "epoch": 0.59, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 3.603411052450236e-07, "loss": 0.2477, "projector_lr": 1.081023315735071e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.177734375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -4.96875, "sft_loss": 0.66015625, "step": 3714 }, { "dpo_loss": 0.06640625, "epoch": 0.59, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 3.60097341683239e-07, "loss": 0.0546, "projector_lr": 1.080292025049717e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.0625, "sft_loss": 0.640625, "step": 3715 }, { "dpo_loss": 0.039794921875, "epoch": 0.59, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 3.598536141930597e-07, "loss": 0.1185, "projector_lr": 1.0795608425791792e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.6875, "sft_loss": 0.671875, "step": 3716 }, { "dpo_loss": 0.19140625, "epoch": 0.59, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 3.596099228373268e-07, "loss": 0.3839, "projector_lr": 1.0788297685119805e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.328125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.4375, "sft_loss": 0.69921875, "step": 3717 }, { "dpo_loss": 0.427734375, "epoch": 0.59, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 3.593662676788723e-07, "loss": 0.4382, "projector_lr": 1.0780988030366171e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.734375, "rewards_train/margins": 2.625, "rewards_train/rejected": -3.359375, "sft_loss": 0.74609375, "step": 3718 }, { "dpo_loss": 0.255859375, "epoch": 0.6, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 3.5912264878051855e-07, "loss": 0.1339, "projector_lr": 1.0773679463415557e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.875, "sft_loss": 0.8828125, "step": 3719 }, { "dpo_loss": 0.388671875, "epoch": 0.6, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 3.5887906620507874e-07, "loss": 0.2309, "projector_lr": 1.0766371986152363e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.1318359375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.0625, "sft_loss": 0.72265625, "step": 3720 }, { "dpo_loss": 0.0194091796875, "epoch": 0.6, "final_loss": 0.0194091796875, "grad_norm": 0.0, "learning_rate": 3.586355200153567e-07, "loss": 0.1352, "projector_lr": 1.0759065600460701e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.76171875, "rewards_train/margins": 5.125, "rewards_train/rejected": -4.375, "sft_loss": 0.79296875, "step": 3721 }, { "dpo_loss": 0.2275390625, "epoch": 0.6, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 3.5839201027414667e-07, "loss": 0.2657, "projector_lr": 1.0751760308224402e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.0625, "sft_loss": 0.69140625, "step": 3722 }, { "dpo_loss": 0.039306640625, "epoch": 0.6, "final_loss": 0.039306640625, "grad_norm": 0.0, "learning_rate": 3.5814853704423365e-07, "loss": 0.0364, "projector_lr": 1.074445611132701e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.328125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -5.34375, "sft_loss": 0.68359375, "step": 3723 }, { "dpo_loss": 0.21484375, "epoch": 0.6, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 3.579051003883932e-07, "loss": 0.2519, "projector_lr": 1.0737153011651798e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.15625, "sft_loss": 0.6640625, "step": 3724 }, { "dpo_loss": 0.1787109375, "epoch": 0.6, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 3.5766170036939156e-07, "loss": 0.0992, "projector_lr": 1.0729851011081748e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 2.234375, "rewards_train/rejected": -3.28125, "sft_loss": 0.94921875, "step": 3725 }, { "dpo_loss": 0.0673828125, "epoch": 0.6, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 3.574183370499855e-07, "loss": 0.0842, "projector_lr": 1.0722550111499567e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2353515625, "rewards_train/margins": 5.75, "rewards_train/rejected": -5.96875, "sft_loss": 0.5, "step": 3726 }, { "dpo_loss": 0.07958984375, "epoch": 0.6, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 3.571750104929224e-07, "loss": 0.048, "projector_lr": 1.0715250314787673e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.244140625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.40625, "sft_loss": 0.640625, "step": 3727 }, { "dpo_loss": 0.47265625, "epoch": 0.6, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 3.5693172076093946e-07, "loss": 0.3249, "projector_lr": 1.0707951622828186e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.267578125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.28125, "sft_loss": 0.49609375, "step": 3728 }, { "dpo_loss": 0.0172119140625, "epoch": 0.6, "final_loss": 0.0172119140625, "grad_norm": 0.0, "learning_rate": 3.5668846791676536e-07, "loss": 0.1178, "projector_lr": 1.0700654037502962e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3515625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.40625, "sft_loss": 0.8203125, "step": 3729 }, { "dpo_loss": 0.1240234375, "epoch": 0.6, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 3.5644525202311884e-07, "loss": 0.1087, "projector_lr": 1.0693357560693565e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.0, "sft_loss": 0.83984375, "step": 3730 }, { "dpo_loss": 0.0017852783203125, "epoch": 0.6, "final_loss": 0.0017852783203125, "grad_norm": 0.0, "learning_rate": 3.562020731427091e-07, "loss": 0.107, "projector_lr": 1.0686062194281273e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46875, "rewards_train/margins": 7.40625, "rewards_train/rejected": -7.875, "sft_loss": 0.75, "step": 3731 }, { "dpo_loss": 0.078125, "epoch": 0.6, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 3.559589313382357e-07, "loss": 0.1249, "projector_lr": 1.0678767940147073e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.234375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.09375, "sft_loss": 0.96484375, "step": 3732 }, { "dpo_loss": 0.138671875, "epoch": 0.6, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 3.5571582667238885e-07, "loss": 0.0813, "projector_lr": 1.0671474800171666e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.09375, "sft_loss": 1.0390625, "step": 3733 }, { "dpo_loss": 0.359375, "epoch": 0.6, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 3.554727592078491e-07, "loss": 0.2253, "projector_lr": 1.0664182776235475e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 3.953125, "rewards_train/rejected": -5.03125, "sft_loss": 0.80078125, "step": 3734 }, { "dpo_loss": 0.30859375, "epoch": 0.6, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 3.552297290072874e-07, "loss": 0.2977, "projector_lr": 1.0656891870218624e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.65625, "sft_loss": 0.734375, "step": 3735 }, { "dpo_loss": 0.06689453125, "epoch": 0.6, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 3.549867361333654e-07, "loss": 0.1167, "projector_lr": 1.0649602084000963e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.90625, "sft_loss": 0.7109375, "step": 3736 }, { "dpo_loss": 0.1728515625, "epoch": 0.6, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 3.5474378064873424e-07, "loss": 0.2384, "projector_lr": 1.0642313419462027e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.59375, "sft_loss": 0.6875, "step": 3737 }, { "dpo_loss": 0.2392578125, "epoch": 0.6, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 3.545008626160363e-07, "loss": 0.1967, "projector_lr": 1.0635025878481091e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.408203125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.6875, "sft_loss": 0.59765625, "step": 3738 }, { "dpo_loss": 0.025634765625, "epoch": 0.6, "final_loss": 0.025634765625, "grad_norm": 0.0, "learning_rate": 3.542579820979041e-07, "loss": 0.0323, "projector_lr": 1.0627739462937124e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.279296875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.75, "sft_loss": 0.828125, "step": 3739 }, { "dpo_loss": 0.1728515625, "epoch": 0.6, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 3.540151391569605e-07, "loss": 0.1727, "projector_lr": 1.0620454174708816e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.287109375, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.484375, "sft_loss": 0.7734375, "step": 3740 }, { "dpo_loss": 0.0269775390625, "epoch": 0.6, "final_loss": 0.0269775390625, "grad_norm": 0.0, "learning_rate": 3.537723338558181e-07, "loss": 0.1235, "projector_lr": 1.0613170015674543e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1123046875, "rewards_train/margins": 4.75, "rewards_train/rejected": -4.875, "sft_loss": 0.61328125, "step": 3741 }, { "dpo_loss": 0.11767578125, "epoch": 0.6, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 3.535295662570806e-07, "loss": 0.326, "projector_lr": 1.060588698771242e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.9375, "sft_loss": 0.7265625, "step": 3742 }, { "dpo_loss": 0.0198974609375, "epoch": 0.6, "final_loss": 0.0198974609375, "grad_norm": 0.0, "learning_rate": 3.532868364233416e-07, "loss": 0.0987, "projector_lr": 1.0598605092700248e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 6.71875, "rewards_train/rejected": -8.5625, "sft_loss": 0.99609375, "step": 3743 }, { "dpo_loss": 0.0556640625, "epoch": 0.6, "final_loss": 0.0556640625, "grad_norm": 0.0, "learning_rate": 3.53044144417185e-07, "loss": 0.0387, "projector_lr": 1.059132433251555e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.349609375, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.5, "sft_loss": 0.6484375, "step": 3744 }, { "dpo_loss": 0.1357421875, "epoch": 0.6, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 3.528014903011851e-07, "loss": 0.0704, "projector_lr": 1.0584044709035555e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.875, "sft_loss": 0.7265625, "step": 3745 }, { "dpo_loss": 0.057373046875, "epoch": 0.6, "final_loss": 0.057373046875, "grad_norm": 0.0, "learning_rate": 3.52558874137906e-07, "loss": 0.0638, "projector_lr": 1.057676622413718e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.04052734375, "rewards_train/margins": 5.8125, "rewards_train/rejected": -5.78125, "sft_loss": 0.53125, "step": 3746 }, { "dpo_loss": 0.50390625, "epoch": 0.6, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 3.523162959899023e-07, "loss": 0.3975, "projector_lr": 1.056948887969707e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.5625, "sft_loss": 0.56640625, "step": 3747 }, { "dpo_loss": 0.921875, "epoch": 0.6, "final_loss": 0.921875, "grad_norm": 0.0, "learning_rate": 3.5207375591971903e-07, "loss": 0.523, "projector_lr": 1.0562212677591572e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.0, "rewards_train/margins": 1.609375, "rewards_train/rejected": -3.609375, "sft_loss": 0.84765625, "step": 3748 }, { "dpo_loss": 0.158203125, "epoch": 0.6, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 3.518312539898912e-07, "loss": 0.1245, "projector_lr": 1.0554937619696736e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.267578125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -4.875, "sft_loss": 0.6796875, "step": 3749 }, { "dpo_loss": 0.0250244140625, "epoch": 0.6, "final_loss": 0.0250244140625, "grad_norm": 0.0, "learning_rate": 3.5158879026294374e-07, "loss": 0.0651, "projector_lr": 1.0547663707888312e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.439453125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.03125, "sft_loss": 0.48828125, "step": 3750 }, { "dpo_loss": 0.1484375, "epoch": 0.6, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 3.513463648013919e-07, "loss": 0.093, "projector_lr": 1.0540390944041759e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.25, "sft_loss": 0.625, "step": 3751 }, { "dpo_loss": 0.55078125, "epoch": 0.6, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 3.5110397766774126e-07, "loss": 0.3556, "projector_lr": 1.0533119330032237e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.484375, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.75, "sft_loss": 0.59375, "step": 3752 }, { "dpo_loss": 0.287109375, "epoch": 0.6, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 3.508616289244873e-07, "loss": 0.216, "projector_lr": 1.0525848867734621e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.859375, "sft_loss": 0.9296875, "step": 3753 }, { "dpo_loss": 0.06640625, "epoch": 0.6, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 3.506193186341159e-07, "loss": 0.058, "projector_lr": 1.0518579559023477e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4296875, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.15625, "sft_loss": 0.671875, "step": 3754 }, { "dpo_loss": 0.30078125, "epoch": 0.6, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 3.503770468591024e-07, "loss": 0.1516, "projector_lr": 1.0511311405773072e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.9375, "sft_loss": 0.796875, "step": 3755 }, { "dpo_loss": 0.2099609375, "epoch": 0.6, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 3.50134813661913e-07, "loss": 0.28, "projector_lr": 1.050404440985739e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.21875, "sft_loss": 1.0078125, "step": 3756 }, { "dpo_loss": 0.1865234375, "epoch": 0.6, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 3.4989261910500317e-07, "loss": 0.1583, "projector_lr": 1.0496778573150096e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.447265625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.6875, "sft_loss": 0.734375, "step": 3757 }, { "dpo_loss": 0.07763671875, "epoch": 0.6, "final_loss": 0.07763671875, "grad_norm": 0.0, "learning_rate": 3.4965046325081916e-07, "loss": 0.2412, "projector_lr": 1.0489513897524575e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.109375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.78125, "sft_loss": 0.671875, "step": 3758 }, { "dpo_loss": 0.22265625, "epoch": 0.6, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 3.4940834616179646e-07, "loss": 0.1726, "projector_lr": 1.0482250384853894e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.90625, "sft_loss": 0.76171875, "step": 3759 }, { "dpo_loss": 0.1630859375, "epoch": 0.6, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 3.491662679003614e-07, "loss": 0.2855, "projector_lr": 1.0474988037010842e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.65625, "sft_loss": 0.83984375, "step": 3760 }, { "dpo_loss": 0.16796875, "epoch": 0.6, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 3.489242285289297e-07, "loss": 0.3127, "projector_lr": 1.046772685586789e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.09375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.8125, "sft_loss": 1.2734375, "step": 3761 }, { "dpo_loss": 0.00628662109375, "epoch": 0.6, "final_loss": 0.00628662109375, "grad_norm": 0.0, "learning_rate": 3.4868222810990735e-07, "loss": 0.0109, "projector_lr": 1.0460466843297221e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 7.40625, "rewards_train/rejected": -8.375, "sft_loss": 0.62890625, "step": 3762 }, { "dpo_loss": 0.037353515625, "epoch": 0.6, "final_loss": 0.037353515625, "grad_norm": 0.0, "learning_rate": 3.4844026670569007e-07, "loss": 0.3017, "projector_lr": 1.0453208001170702e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.03125, "sft_loss": 1.0390625, "step": 3763 }, { "dpo_loss": 0.1611328125, "epoch": 0.6, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 3.481983443786636e-07, "loss": 0.0954, "projector_lr": 1.0445950331359907e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.890625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.9375, "sft_loss": 0.796875, "step": 3764 }, { "dpo_loss": 0.130859375, "epoch": 0.6, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 3.479564611912037e-07, "loss": 0.0673, "projector_lr": 1.0438693835736112e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.072265625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.25, "sft_loss": 0.63671875, "step": 3765 }, { "dpo_loss": 0.007720947265625, "epoch": 0.6, "final_loss": 0.007720947265625, "grad_norm": 0.0, "learning_rate": 3.4771461720567606e-07, "loss": 0.1965, "projector_lr": 1.0431438516170283e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 7.4375, "rewards_train/rejected": -7.9375, "sft_loss": 0.67578125, "step": 3766 }, { "dpo_loss": 0.01123046875, "epoch": 0.6, "final_loss": 0.01123046875, "grad_norm": 0.0, "learning_rate": 3.474728124844363e-07, "loss": 0.1112, "projector_lr": 1.042418437453309e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.043701171875, "rewards_train/margins": 6.53125, "rewards_train/rejected": -6.59375, "sft_loss": 0.67578125, "step": 3767 }, { "dpo_loss": 0.0859375, "epoch": 0.6, "final_loss": 0.0859375, "grad_norm": 0.0, "learning_rate": 3.4723104708982944e-07, "loss": 0.0845, "projector_lr": 1.0416931412694883e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.4375, "sft_loss": 0.81640625, "step": 3768 }, { "dpo_loss": 0.0263671875, "epoch": 0.6, "final_loss": 0.0263671875, "grad_norm": 0.0, "learning_rate": 3.469893210841909e-07, "loss": 0.3144, "projector_lr": 1.0409679632525727e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.0625, "sft_loss": 0.74609375, "step": 3769 }, { "dpo_loss": 0.0888671875, "epoch": 0.6, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 3.467476345298458e-07, "loss": 0.133, "projector_lr": 1.0402429035895374e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15234375, "rewards_train/margins": 3.5, "rewards_train/rejected": -3.640625, "sft_loss": 0.91015625, "step": 3770 }, { "dpo_loss": 0.04736328125, "epoch": 0.6, "final_loss": 0.04736328125, "grad_norm": 0.0, "learning_rate": 3.4650598748910896e-07, "loss": 0.0537, "projector_lr": 1.039517962467327e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 7.4375, "rewards_train/rejected": -8.5, "sft_loss": 0.58984375, "step": 3771 }, { "dpo_loss": 0.08740234375, "epoch": 0.6, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 3.462643800242854e-07, "loss": 0.1915, "projector_lr": 1.0387931400728563e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.90625, "sft_loss": 0.76171875, "step": 3772 }, { "dpo_loss": 0.494140625, "epoch": 0.6, "final_loss": 0.494140625, "grad_norm": 0.0, "learning_rate": 3.460228121976693e-07, "loss": 0.3181, "projector_lr": 1.0380684365930079e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.203125, "rewards_train/rejected": -4.4375, "sft_loss": 0.78515625, "step": 3773 }, { "dpo_loss": 0.443359375, "epoch": 0.6, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 3.457812840715449e-07, "loss": 0.2688, "projector_lr": 1.0373438522146348e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.984375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.9375, "sft_loss": 0.66015625, "step": 3774 }, { "dpo_loss": 0.1484375, "epoch": 0.6, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 3.4553979570818657e-07, "loss": 0.4351, "projector_lr": 1.0366193871245599e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.396484375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.0625, "sft_loss": 0.8984375, "step": 3775 }, { "dpo_loss": 0.56640625, "epoch": 0.6, "final_loss": 0.56640625, "grad_norm": 0.0, "learning_rate": 3.4529834716985815e-07, "loss": 0.6497, "projector_lr": 1.0358950415095746e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.09375, "sft_loss": 0.6015625, "step": 3776 }, { "dpo_loss": 0.255859375, "epoch": 0.6, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 3.450569385188129e-07, "loss": 0.13, "projector_lr": 1.0351708155564388e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.0625, "sft_loss": 0.8046875, "step": 3777 }, { "dpo_loss": 0.0673828125, "epoch": 0.6, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 3.448155698172941e-07, "loss": 0.2923, "projector_lr": 1.0344467094518823e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.40625, "sft_loss": 0.80859375, "step": 3778 }, { "dpo_loss": 0.08544921875, "epoch": 0.6, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 3.4457424112753484e-07, "loss": 0.1115, "projector_lr": 1.0337227233826047e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.046875, "rewards_train/margins": 6.0, "rewards_train/rejected": -8.0625, "sft_loss": 1.0, "step": 3779 }, { "dpo_loss": 0.1220703125, "epoch": 0.6, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 3.443329525117578e-07, "loss": 0.1286, "projector_lr": 1.0329988575352735e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1259765625, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.0625, "sft_loss": 0.70703125, "step": 3780 }, { "dpo_loss": 0.058349609375, "epoch": 0.6, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 3.440917040321754e-07, "loss": 0.0322, "projector_lr": 1.0322751120965264e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.625, "sft_loss": 0.671875, "step": 3781 }, { "dpo_loss": 0.0169677734375, "epoch": 0.61, "final_loss": 0.0169677734375, "grad_norm": 0.0, "learning_rate": 3.4385049575098925e-07, "loss": 0.2601, "projector_lr": 1.0315514872529678e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.703125, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.9375, "sft_loss": 0.68359375, "step": 3782 }, { "dpo_loss": 0.44140625, "epoch": 0.61, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 3.436093277303911e-07, "loss": 0.3735, "projector_lr": 1.0308279831911733e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.375, "sft_loss": 0.84375, "step": 3783 }, { "dpo_loss": 0.2451171875, "epoch": 0.61, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 3.4336820003256224e-07, "loss": 0.4182, "projector_lr": 1.0301046000976867e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -3.9375, "sft_loss": 1.03125, "step": 3784 }, { "dpo_loss": 0.1337890625, "epoch": 0.61, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 3.4312711271967364e-07, "loss": 0.1642, "projector_lr": 1.029381338159021e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.59375, "sft_loss": 0.6640625, "step": 3785 }, { "dpo_loss": 0.00787353515625, "epoch": 0.61, "final_loss": 0.00787353515625, "grad_norm": 0.0, "learning_rate": 3.428860658538852e-07, "loss": 0.3157, "projector_lr": 1.0286581975616558e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 7.0625, "rewards_train/rejected": -7.625, "sft_loss": 0.74609375, "step": 3786 }, { "dpo_loss": 0.166015625, "epoch": 0.61, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 3.426450594973474e-07, "loss": 0.1239, "projector_lr": 1.0279351784920423e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.625, "sft_loss": 0.77734375, "step": 3787 }, { "dpo_loss": 0.09423828125, "epoch": 0.61, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 3.424040937121995e-07, "loss": 0.3069, "projector_lr": 1.0272122811365987e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.197265625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.03125, "sft_loss": 0.609375, "step": 3788 }, { "dpo_loss": 0.2470703125, "epoch": 0.61, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 3.421631685605707e-07, "loss": 0.1386, "projector_lr": 1.0264895056817122e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.28125, "sft_loss": 0.5859375, "step": 3789 }, { "dpo_loss": 0.1865234375, "epoch": 0.61, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 3.4192228410457967e-07, "loss": 0.1226, "projector_lr": 1.0257668523137391e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.53125, "sft_loss": 0.92578125, "step": 3790 }, { "dpo_loss": 0.07275390625, "epoch": 0.61, "final_loss": 0.07275390625, "grad_norm": 0.0, "learning_rate": 3.416814404063344e-07, "loss": 0.0442, "projector_lr": 1.0250443212190032e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.25, "sft_loss": 0.71875, "step": 3791 }, { "dpo_loss": 0.359375, "epoch": 0.61, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 3.4144063752793245e-07, "loss": 0.3322, "projector_lr": 1.0243219125837975e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.1875, "sft_loss": 0.7578125, "step": 3792 }, { "dpo_loss": 0.1748046875, "epoch": 0.61, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 3.411998755314609e-07, "loss": 0.2335, "projector_lr": 1.0235996265943829e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.5625, "sft_loss": 0.6953125, "step": 3793 }, { "dpo_loss": 0.0888671875, "epoch": 0.61, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 3.4095915447899645e-07, "loss": 0.0841, "projector_lr": 1.0228774634369892e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.8125, "sft_loss": 0.82421875, "step": 3794 }, { "dpo_loss": 0.04345703125, "epoch": 0.61, "final_loss": 0.04345703125, "grad_norm": 0.0, "learning_rate": 3.4071847443260484e-07, "loss": 0.1039, "projector_lr": 1.0221554232978146e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 8.1875, "rewards_train/rejected": -8.6875, "sft_loss": 0.80859375, "step": 3795 }, { "dpo_loss": 0.212890625, "epoch": 0.61, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 3.404778354543415e-07, "loss": 0.1261, "projector_lr": 1.0214335063630246e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.734375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -5.15625, "sft_loss": 0.64453125, "step": 3796 }, { "dpo_loss": 0.02978515625, "epoch": 0.61, "final_loss": 0.02978515625, "grad_norm": 0.0, "learning_rate": 3.402372376062515e-07, "loss": 0.034, "projector_lr": 1.0207117128187545e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.408203125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -5.9375, "sft_loss": 0.6796875, "step": 3797 }, { "dpo_loss": 0.0189208984375, "epoch": 0.61, "final_loss": 0.0189208984375, "grad_norm": 0.0, "learning_rate": 3.3999668095036874e-07, "loss": 0.0195, "projector_lr": 1.0199900428511064e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.419921875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.5625, "sft_loss": 0.7421875, "step": 3798 }, { "dpo_loss": 0.03564453125, "epoch": 0.61, "final_loss": 0.03564453125, "grad_norm": 0.0, "learning_rate": 3.3975616554871713e-07, "loss": 0.1012, "projector_lr": 1.0192684966461515e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.71875, "sft_loss": 0.70703125, "step": 3799 }, { "dpo_loss": 0.375, "epoch": 0.61, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 3.3951569146330923e-07, "loss": 0.2735, "projector_lr": 1.0185470743899278e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 3.546875, "rewards_train/rejected": -3.890625, "sft_loss": 0.5546875, "step": 3800 }, { "dpo_loss": 0.25390625, "epoch": 0.61, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 3.3927525875614754e-07, "loss": 0.3942, "projector_lr": 1.0178257762684427e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.53125, "rewards_train/margins": 3.453125, "rewards_train/rejected": -5.0, "sft_loss": 0.7265625, "step": 3801 }, { "dpo_loss": 0.1767578125, "epoch": 0.61, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 3.3903486748922374e-07, "loss": 0.1216, "projector_lr": 1.0171046024676712e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.03125, "sft_loss": 0.84765625, "step": 3802 }, { "dpo_loss": 0.10986328125, "epoch": 0.61, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 3.3879451772451885e-07, "loss": 0.2944, "projector_lr": 1.0163835531735565e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.34375, "sft_loss": 0.734375, "step": 3803 }, { "dpo_loss": 0.0299072265625, "epoch": 0.61, "final_loss": 0.0299072265625, "grad_norm": 0.0, "learning_rate": 3.3855420952400284e-07, "loss": 0.1145, "projector_lr": 1.0156626285720086e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.375, "sft_loss": 0.68359375, "step": 3804 }, { "dpo_loss": 0.1826171875, "epoch": 0.61, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 3.3831394294963534e-07, "loss": 0.0928, "projector_lr": 1.014941828848906e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.53125, "sft_loss": 0.69140625, "step": 3805 }, { "dpo_loss": 0.09130859375, "epoch": 0.61, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 3.3807371806336515e-07, "loss": 0.0788, "projector_lr": 1.0142211541900956e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.482421875, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.6875, "sft_loss": 0.5078125, "step": 3806 }, { "dpo_loss": 0.1552734375, "epoch": 0.61, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 3.3783353492713046e-07, "loss": 0.0995, "projector_lr": 1.0135006047813915e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.34375, "sft_loss": 0.68359375, "step": 3807 }, { "dpo_loss": 0.1923828125, "epoch": 0.61, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 3.375933936028586e-07, "loss": 0.2642, "projector_lr": 1.012780180808576e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.609375, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.96875, "sft_loss": 0.8125, "step": 3808 }, { "dpo_loss": 0.064453125, "epoch": 0.61, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 3.3735329415246574e-07, "loss": 0.1733, "projector_lr": 1.0120598824573971e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.625, "sft_loss": 0.6875, "step": 3809 }, { "dpo_loss": 0.177734375, "epoch": 0.61, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 3.371132366378577e-07, "loss": 0.1657, "projector_lr": 1.011339709913573e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.09375, "sft_loss": 0.62109375, "step": 3810 }, { "dpo_loss": 0.1474609375, "epoch": 0.61, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 3.368732211209294e-07, "loss": 0.2879, "projector_lr": 1.0106196633627884e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.53125, "sft_loss": 0.7109375, "step": 3811 }, { "dpo_loss": 0.01153564453125, "epoch": 0.61, "final_loss": 0.01153564453125, "grad_norm": 0.0, "learning_rate": 3.3663324766356523e-07, "loss": 0.4096, "projector_lr": 1.0098997429906956e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.03125, "sft_loss": 0.734375, "step": 3812 }, { "dpo_loss": 0.408203125, "epoch": 0.61, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 3.363933163276379e-07, "loss": 0.2072, "projector_lr": 1.0091799489829137e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.984375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.0, "sft_loss": 0.79296875, "step": 3813 }, { "dpo_loss": 0.27734375, "epoch": 0.61, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 3.361534271750099e-07, "loss": 0.2186, "projector_lr": 1.00846028152503e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 2.703125, "rewards_train/rejected": -4.0625, "sft_loss": 0.91796875, "step": 3814 }, { "dpo_loss": 0.7578125, "epoch": 0.61, "final_loss": 0.7578125, "grad_norm": 0.0, "learning_rate": 3.359135802675329e-07, "loss": 0.4127, "projector_lr": 1.0077407408025987e-06, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.15625, "sft_loss": 0.77734375, "step": 3815 }, { "dpo_loss": 0.004058837890625, "epoch": 0.61, "final_loss": 0.004058837890625, "grad_norm": 0.0, "learning_rate": 3.356737756670473e-07, "loss": 0.0031, "projector_lr": 1.007021327001142e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 7.3125, "rewards_train/rejected": -8.25, "sft_loss": 0.88671875, "step": 3816 }, { "dpo_loss": 0.2890625, "epoch": 0.61, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 3.354340134353831e-07, "loss": 0.7615, "projector_lr": 1.0063020403061495e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.25, "sft_loss": 0.93359375, "step": 3817 }, { "dpo_loss": 0.1806640625, "epoch": 0.61, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 3.351942936343586e-07, "loss": 0.1802, "projector_lr": 1.005582880903076e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.28125, "sft_loss": 0.66015625, "step": 3818 }, { "dpo_loss": 0.040771484375, "epoch": 0.61, "final_loss": 0.040771484375, "grad_norm": 0.0, "learning_rate": 3.3495461632578193e-07, "loss": 0.1029, "projector_lr": 1.0048638489773458e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.375, "sft_loss": 0.83203125, "step": 3819 }, { "dpo_loss": 0.40625, "epoch": 0.61, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 3.347149815714498e-07, "loss": 0.3912, "projector_lr": 1.0041449447143495e-06, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 2.328125, "rewards_train/rejected": -3.734375, "sft_loss": 0.5546875, "step": 3820 }, { "dpo_loss": 0.7265625, "epoch": 0.61, "final_loss": 0.7265625, "grad_norm": 0.0, "learning_rate": 3.3447538943314837e-07, "loss": 0.4086, "projector_lr": 1.003426168299445e-06, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.453125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -5.0625, "sft_loss": 1.0234375, "step": 3821 }, { "dpo_loss": 0.07861328125, "epoch": 0.61, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 3.3423583997265217e-07, "loss": 0.0628, "projector_lr": 1.0027075199179565e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.03125, "sft_loss": 0.87890625, "step": 3822 }, { "dpo_loss": 0.045654296875, "epoch": 0.61, "final_loss": 0.045654296875, "grad_norm": 0.0, "learning_rate": 3.339963332517252e-07, "loss": 0.1602, "projector_lr": 1.0019889997551757e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.0, "sft_loss": 0.6328125, "step": 3823 }, { "dpo_loss": 0.059814453125, "epoch": 0.61, "final_loss": 0.059814453125, "grad_norm": 0.0, "learning_rate": 3.337568693321204e-07, "loss": 0.0809, "projector_lr": 1.0012706079963613e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.25, "sft_loss": 0.7890625, "step": 3824 }, { "dpo_loss": 0.10498046875, "epoch": 0.61, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 3.335174482755797e-07, "loss": 0.2483, "projector_lr": 1.0005523448267392e-06, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.5, "sft_loss": 0.83203125, "step": 3825 }, { "dpo_loss": 0.2890625, "epoch": 0.61, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 3.3327807014383405e-07, "loss": 0.2125, "projector_lr": 9.99834210431502e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.1875, "sft_loss": 0.94140625, "step": 3826 }, { "dpo_loss": 0.0142822265625, "epoch": 0.61, "final_loss": 0.0142822265625, "grad_norm": 0.0, "learning_rate": 3.330387349986026e-07, "loss": 0.1807, "projector_lr": 9.991162049958077e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.5, "sft_loss": 0.70703125, "step": 3827 }, { "dpo_loss": 0.150390625, "epoch": 0.61, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 3.327994429015944e-07, "loss": 0.1802, "projector_lr": 9.983983287047832e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.46875, "sft_loss": 0.81640625, "step": 3828 }, { "dpo_loss": 0.0107421875, "epoch": 0.61, "final_loss": 0.0107421875, "grad_norm": 0.0, "learning_rate": 3.325601939145069e-07, "loss": 0.0937, "projector_lr": 9.976805817435208e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -5.84375, "sft_loss": 0.68359375, "step": 3829 }, { "dpo_loss": 0.1298828125, "epoch": 0.61, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 3.3232098809902666e-07, "loss": 0.083, "projector_lr": 9.9696296429708e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.84375, "sft_loss": 0.69140625, "step": 3830 }, { "dpo_loss": 0.5234375, "epoch": 0.61, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 3.3208182551682885e-07, "loss": 0.3335, "projector_lr": 9.962454765504866e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.3125, "sft_loss": 0.859375, "step": 3831 }, { "dpo_loss": 0.01019287109375, "epoch": 0.61, "final_loss": 0.01019287109375, "grad_norm": 0.0, "learning_rate": 3.3184270622957754e-07, "loss": 0.1925, "projector_lr": 9.955281186887326e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.068359375, "rewards_train/margins": 6.46875, "rewards_train/rejected": -6.40625, "sft_loss": 0.55859375, "step": 3832 }, { "dpo_loss": 0.08203125, "epoch": 0.61, "final_loss": 0.08203125, "grad_norm": 0.0, "learning_rate": 3.316036302989258e-07, "loss": 0.0517, "projector_lr": 9.948108908967774e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.375, "sft_loss": 0.7578125, "step": 3833 }, { "dpo_loss": 0.435546875, "epoch": 0.61, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 3.313645977865154e-07, "loss": 0.2662, "projector_lr": 9.940937933595462e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.71875, "sft_loss": 0.77734375, "step": 3834 }, { "dpo_loss": 0.126953125, "epoch": 0.61, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 3.311256087539771e-07, "loss": 0.0981, "projector_lr": 9.933768262619315e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.40625, "sft_loss": 1.0546875, "step": 3835 }, { "dpo_loss": 0.00958251953125, "epoch": 0.61, "final_loss": 0.00958251953125, "grad_norm": 0.0, "learning_rate": 3.3088666326293013e-07, "loss": 0.2379, "projector_lr": 9.926599897887904e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1279296875, "rewards_train/margins": 8.0, "rewards_train/rejected": -8.125, "sft_loss": 0.53125, "step": 3836 }, { "dpo_loss": 0.1962890625, "epoch": 0.61, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 3.3064776137498285e-07, "loss": 0.2074, "projector_lr": 9.919432841249487e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.8125, "sft_loss": 1.15625, "step": 3837 }, { "dpo_loss": 0.014892578125, "epoch": 0.61, "final_loss": 0.014892578125, "grad_norm": 0.0, "learning_rate": 3.304089031517319e-07, "loss": 0.056, "projector_lr": 9.912267094551959e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.09375, "sft_loss": 0.59375, "step": 3838 }, { "dpo_loss": 0.318359375, "epoch": 0.61, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 3.3017008865476324e-07, "loss": 0.2699, "projector_lr": 9.905102659642898e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.03125, "sft_loss": 0.86328125, "step": 3839 }, { "dpo_loss": 0.03857421875, "epoch": 0.61, "final_loss": 0.03857421875, "grad_norm": 0.0, "learning_rate": 3.299313179456511e-07, "loss": 0.2547, "projector_lr": 9.897939538369533e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.96875, "sft_loss": 0.6875, "step": 3840 }, { "dpo_loss": 0.32421875, "epoch": 0.61, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 3.2969259108595866e-07, "loss": 0.5228, "projector_lr": 9.89077773257876e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.083984375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.390625, "sft_loss": 0.86328125, "step": 3841 }, { "dpo_loss": 0.62109375, "epoch": 0.61, "final_loss": 0.62109375, "grad_norm": 0.0, "learning_rate": 3.294539081372376e-07, "loss": 0.3965, "projector_lr": 9.883617244117129e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.578125, "rewards_train/margins": 1.5625, "rewards_train/rejected": -3.140625, "sft_loss": 0.98046875, "step": 3842 }, { "dpo_loss": 0.0341796875, "epoch": 0.61, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 3.292152691610286e-07, "loss": 0.0731, "projector_lr": 9.876458074830859e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.018798828125, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.15625, "sft_loss": 0.83203125, "step": 3843 }, { "dpo_loss": 0.005157470703125, "epoch": 0.62, "final_loss": 0.005157470703125, "grad_norm": 0.0, "learning_rate": 3.2897667421886096e-07, "loss": 0.091, "projector_lr": 9.86930022656583e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.412109375, "rewards_train/margins": 7.71875, "rewards_train/rejected": -7.3125, "sft_loss": 0.6640625, "step": 3844 }, { "dpo_loss": 0.20703125, "epoch": 0.62, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 3.287381233722519e-07, "loss": 0.3116, "projector_lr": 9.862143701167559e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.96875, "sft_loss": 0.9140625, "step": 3845 }, { "dpo_loss": 0.1767578125, "epoch": 0.62, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 3.2849961668270834e-07, "loss": 0.271, "projector_lr": 9.85498850048125e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3515625, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.21875, "sft_loss": 0.9609375, "step": 3846 }, { "dpo_loss": 0.05712890625, "epoch": 0.62, "final_loss": 0.05712890625, "grad_norm": 0.0, "learning_rate": 3.28261154211725e-07, "loss": 0.0713, "projector_lr": 9.847834626351752e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 7.40625, "rewards_train/rejected": -7.875, "sft_loss": 0.53515625, "step": 3847 }, { "dpo_loss": 0.41796875, "epoch": 0.62, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 3.2802273602078594e-07, "loss": 0.2853, "projector_lr": 9.840682080623578e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.625, "sft_loss": 0.75390625, "step": 3848 }, { "dpo_loss": 0.2255859375, "epoch": 0.62, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 3.277843621713629e-07, "loss": 0.1521, "projector_lr": 9.833530865140887e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 2.28125, "rewards_train/rejected": -2.75, "sft_loss": 0.75, "step": 3849 }, { "dpo_loss": 0.055908203125, "epoch": 0.62, "final_loss": 0.055908203125, "grad_norm": 0.0, "learning_rate": 3.275460327249169e-07, "loss": 0.1376, "projector_lr": 9.826380981747508e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.875, "sft_loss": 0.55078125, "step": 3850 }, { "dpo_loss": 0.01171875, "epoch": 0.62, "final_loss": 0.01171875, "grad_norm": 0.0, "learning_rate": 3.2730774774289715e-07, "loss": 0.3861, "projector_lr": 9.819232432286915e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.087890625, "rewards_train/margins": 6.78125, "rewards_train/rejected": -6.875, "sft_loss": 0.5703125, "step": 3851 }, { "dpo_loss": 0.04443359375, "epoch": 0.62, "final_loss": 0.04443359375, "grad_norm": 0.0, "learning_rate": 3.2706950728674166e-07, "loss": 0.0586, "projector_lr": 9.81208521860225e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.8125, "sft_loss": 0.8203125, "step": 3852 }, { "dpo_loss": 0.1337890625, "epoch": 0.62, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 3.26831311417877e-07, "loss": 0.1623, "projector_lr": 9.80493934253631e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.46875, "sft_loss": 0.875, "step": 3853 }, { "dpo_loss": 0.201171875, "epoch": 0.62, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 3.2659316019771765e-07, "loss": 0.2006, "projector_lr": 9.79779480593153e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.515625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.5, "sft_loss": 0.78125, "step": 3854 }, { "dpo_loss": 0.462890625, "epoch": 0.62, "final_loss": 0.462890625, "grad_norm": 0.0, "learning_rate": 3.2635505368766727e-07, "loss": 0.2734, "projector_lr": 9.79065161063002e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 2.65625, "rewards_train/rejected": -3.71875, "sft_loss": 0.8671875, "step": 3855 }, { "dpo_loss": 0.58984375, "epoch": 0.62, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 3.2611699194911757e-07, "loss": 0.4561, "projector_lr": 9.783509758473529e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.875, "sft_loss": 1.015625, "step": 3856 }, { "dpo_loss": 0.23046875, "epoch": 0.62, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 3.258789750434492e-07, "loss": 0.1312, "projector_lr": 9.776369251303475e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.875, "sft_loss": 0.78515625, "step": 3857 }, { "dpo_loss": 0.013916015625, "epoch": 0.62, "final_loss": 0.013916015625, "grad_norm": 0.0, "learning_rate": 3.256410030320304e-07, "loss": 0.0444, "projector_lr": 9.76923009096091e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.3125, "sft_loss": 0.79296875, "step": 3858 }, { "dpo_loss": 0.1962890625, "epoch": 0.62, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 3.254030759762186e-07, "loss": 0.222, "projector_lr": 9.762092279286558e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.71875, "sft_loss": 0.90234375, "step": 3859 }, { "dpo_loss": 0.197265625, "epoch": 0.62, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 3.251651939373594e-07, "loss": 0.1064, "projector_lr": 9.754955818120784e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.109375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -6.3125, "sft_loss": 0.70703125, "step": 3860 }, { "dpo_loss": 0.045654296875, "epoch": 0.62, "final_loss": 0.045654296875, "grad_norm": 0.0, "learning_rate": 3.249273569767867e-07, "loss": 0.0442, "projector_lr": 9.747820709303604e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.625, "sft_loss": 0.60546875, "step": 3861 }, { "dpo_loss": 0.2275390625, "epoch": 0.62, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 3.2468956515582324e-07, "loss": 0.1945, "projector_lr": 9.740686954674698e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0078125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.34375, "sft_loss": 0.7890625, "step": 3862 }, { "dpo_loss": 0.7890625, "epoch": 0.62, "final_loss": 0.7890625, "grad_norm": 0.0, "learning_rate": 3.2445181853577926e-07, "loss": 0.5574, "projector_lr": 9.733554556073378e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.546875, "rewards_train/margins": 2.375, "rewards_train/rejected": -3.921875, "sft_loss": 0.91015625, "step": 3863 }, { "dpo_loss": 0.16015625, "epoch": 0.62, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 3.2421411717795386e-07, "loss": 0.153, "projector_lr": 9.726423515338618e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.5625, "rewards_train/margins": 4.5, "rewards_train/rejected": -3.921875, "sft_loss": 0.75, "step": 3864 }, { "dpo_loss": 0.01019287109375, "epoch": 0.62, "final_loss": 0.01019287109375, "grad_norm": 0.0, "learning_rate": 3.239764611436347e-07, "loss": 0.1516, "projector_lr": 9.719293834309042e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.75, "sft_loss": 0.7734375, "step": 3865 }, { "dpo_loss": 0.1796875, "epoch": 0.62, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 3.2373885049409753e-07, "loss": 0.1192, "projector_lr": 9.712165514822926e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.90625, "sft_loss": 0.96484375, "step": 3866 }, { "dpo_loss": 0.044677734375, "epoch": 0.62, "final_loss": 0.044677734375, "grad_norm": 0.0, "learning_rate": 3.2350128529060595e-07, "loss": 0.1121, "projector_lr": 9.70503855871818e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.46875, "sft_loss": 0.5703125, "step": 3867 }, { "dpo_loss": 0.55859375, "epoch": 0.62, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 3.2326376559441257e-07, "loss": 0.3674, "projector_lr": 9.697912967832376e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.578125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -4.0625, "sft_loss": 1.0, "step": 3868 }, { "dpo_loss": 0.06494140625, "epoch": 0.62, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 3.2302629146675776e-07, "loss": 0.1576, "projector_lr": 9.690788744002732e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.875, "sft_loss": 1.265625, "step": 3869 }, { "dpo_loss": 0.00982666015625, "epoch": 0.62, "final_loss": 0.00982666015625, "grad_norm": 0.0, "learning_rate": 3.2278886296887053e-07, "loss": 0.035, "projector_lr": 9.683665889066116e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.359375, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.34375, "sft_loss": 0.478515625, "step": 3870 }, { "dpo_loss": 0.1572265625, "epoch": 0.62, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 3.2255148016196785e-07, "loss": 0.1029, "projector_lr": 9.676544404859036e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.8125, "sft_loss": 0.890625, "step": 3871 }, { "dpo_loss": 0.365234375, "epoch": 0.62, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 3.2231414310725504e-07, "loss": 0.2194, "projector_lr": 9.669424293217652e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.265625, "rewards_train/rejected": -4.59375, "sft_loss": 0.97265625, "step": 3872 }, { "dpo_loss": 0.455078125, "epoch": 0.62, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 3.220768518659254e-07, "loss": 0.2867, "projector_lr": 9.66230555597776e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.453125, "rewards_train/margins": 2.328125, "rewards_train/rejected": -4.78125, "sft_loss": 0.67578125, "step": 3873 }, { "dpo_loss": 0.26171875, "epoch": 0.62, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 3.218396064991605e-07, "loss": 0.158, "projector_lr": 9.655188194974816e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.65625, "sft_loss": 0.67578125, "step": 3874 }, { "dpo_loss": 0.2138671875, "epoch": 0.62, "final_loss": 0.2138671875, "grad_norm": 0.0, "learning_rate": 3.216024070681306e-07, "loss": 0.1742, "projector_lr": 9.648072212043917e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.6875, "sft_loss": 0.53125, "step": 3875 }, { "dpo_loss": 0.2099609375, "epoch": 0.62, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 3.213652536339933e-07, "loss": 0.1573, "projector_lr": 9.6409576090198e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.828125, "rewards_train/margins": 6.625, "rewards_train/rejected": -7.4375, "sft_loss": 0.58984375, "step": 3876 }, { "dpo_loss": 0.1787109375, "epoch": 0.62, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 3.211281462578948e-07, "loss": 0.1, "projector_lr": 9.633844387736844e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.96875, "sft_loss": 0.89453125, "step": 3877 }, { "dpo_loss": 0.251953125, "epoch": 0.62, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 3.208910850009695e-07, "loss": 0.2448, "projector_lr": 9.626732550029087e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.21875, "sft_loss": 0.5546875, "step": 3878 }, { "dpo_loss": 0.1865234375, "epoch": 0.62, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 3.2065406992433957e-07, "loss": 0.3508, "projector_lr": 9.619622097730189e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.65625, "sft_loss": 0.625, "step": 3879 }, { "dpo_loss": 0.0128173828125, "epoch": 0.62, "final_loss": 0.0128173828125, "grad_norm": 0.0, "learning_rate": 3.204171010891156e-07, "loss": 0.0104, "projector_lr": 9.61251303267347e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.205078125, "rewards_train/margins": 7.3125, "rewards_train/rejected": -7.5, "sft_loss": 0.58984375, "step": 3880 }, { "dpo_loss": 0.1123046875, "epoch": 0.62, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 3.20180178556396e-07, "loss": 0.1184, "projector_lr": 9.605405356691881e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.201171875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.28125, "sft_loss": 0.77734375, "step": 3881 }, { "dpo_loss": 0.00909423828125, "epoch": 0.62, "final_loss": 0.00909423828125, "grad_norm": 0.0, "learning_rate": 3.199433023872674e-07, "loss": 0.0118, "projector_lr": 9.598299071618022e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 7.34375, "rewards_train/rejected": -8.3125, "sft_loss": 0.9140625, "step": 3882 }, { "dpo_loss": 0.34375, "epoch": 0.62, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 3.197064726428044e-07, "loss": 0.2839, "projector_lr": 9.591194179284134e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.46875, "sft_loss": 0.70703125, "step": 3883 }, { "dpo_loss": 0.251953125, "epoch": 0.62, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 3.194696893840699e-07, "loss": 0.1423, "projector_lr": 9.584090681522098e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.265625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.21875, "sft_loss": 0.75390625, "step": 3884 }, { "dpo_loss": 0.1220703125, "epoch": 0.62, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 3.1923295267211423e-07, "loss": 0.1346, "projector_lr": 9.57698858016343e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.75, "sft_loss": 0.97265625, "step": 3885 }, { "dpo_loss": 0.345703125, "epoch": 0.62, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 3.189962625679763e-07, "loss": 0.2304, "projector_lr": 9.56988787703929e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.625, "sft_loss": 0.85546875, "step": 3886 }, { "dpo_loss": 0.09375, "epoch": 0.62, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 3.1875961913268255e-07, "loss": 0.1006, "projector_lr": 9.562788573980478e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.84375, "sft_loss": 0.61328125, "step": 3887 }, { "dpo_loss": 0.1787109375, "epoch": 0.62, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 3.1852302242724784e-07, "loss": 0.148, "projector_lr": 9.555690672817436e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.765625, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.125, "sft_loss": 1.0, "step": 3888 }, { "dpo_loss": 0.46875, "epoch": 0.62, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 3.1828647251267483e-07, "loss": 0.7158, "projector_lr": 9.548594175380245e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -3.9375, "sft_loss": 0.65625, "step": 3889 }, { "dpo_loss": 0.3359375, "epoch": 0.62, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 3.1804996944995365e-07, "loss": 0.5256, "projector_lr": 9.541499083498611e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.78125, "sft_loss": 0.74609375, "step": 3890 }, { "dpo_loss": 0.25390625, "epoch": 0.62, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 3.17813513300063e-07, "loss": 0.1831, "projector_lr": 9.534405399001891e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.734375, "rewards_train/margins": 2.453125, "rewards_train/rejected": -4.1875, "sft_loss": 0.91015625, "step": 3891 }, { "dpo_loss": 0.287109375, "epoch": 0.62, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 3.1757710412396914e-07, "loss": 0.2982, "projector_lr": 9.527313123719075e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.71875, "sft_loss": 0.796875, "step": 3892 }, { "dpo_loss": 0.38671875, "epoch": 0.62, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 3.1734074198262656e-07, "loss": 0.1996, "projector_lr": 9.520222259478797e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.75, "sft_loss": 0.62890625, "step": 3893 }, { "dpo_loss": 0.31640625, "epoch": 0.62, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 3.171044269369769e-07, "loss": 0.4724, "projector_lr": 9.513132808109308e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.59375, "sft_loss": 0.6328125, "step": 3894 }, { "dpo_loss": 0.1826171875, "epoch": 0.62, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 3.1686815904795037e-07, "loss": 0.0961, "projector_lr": 9.506044771438513e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.25, "sft_loss": 0.86328125, "step": 3895 }, { "dpo_loss": 0.1513671875, "epoch": 0.62, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 3.1663193837646473e-07, "loss": 0.1043, "projector_lr": 9.498958151293943e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.6875, "sft_loss": 1.0078125, "step": 3896 }, { "dpo_loss": 0.49609375, "epoch": 0.62, "final_loss": 0.49609375, "grad_norm": 0.0, "learning_rate": 3.1639576498342557e-07, "loss": 0.2809, "projector_lr": 9.491872949502768e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.375, "sft_loss": 0.875, "step": 3897 }, { "dpo_loss": 0.0186767578125, "epoch": 0.62, "final_loss": 0.0186767578125, "grad_norm": 0.0, "learning_rate": 3.1615963892972666e-07, "loss": 0.1682, "projector_lr": 9.484789167891799e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.75, "sft_loss": 0.83203125, "step": 3898 }, { "dpo_loss": 0.09423828125, "epoch": 0.62, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 3.1592356027624866e-07, "loss": 0.3969, "projector_lr": 9.47770680828746e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.375, "sft_loss": 0.7109375, "step": 3899 }, { "dpo_loss": 0.16796875, "epoch": 0.62, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 3.15687529083861e-07, "loss": 0.2497, "projector_lr": 9.47062587251583e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.6875, "sft_loss": 0.81640625, "step": 3900 }, { "dpo_loss": 0.1689453125, "epoch": 0.62, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 3.154515454134202e-07, "loss": 0.2241, "projector_lr": 9.463546362402606e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.21875, "sft_loss": 0.8828125, "step": 3901 }, { "dpo_loss": 0.078125, "epoch": 0.62, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 3.152156093257711e-07, "loss": 0.2151, "projector_lr": 9.456468279773133e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.734375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -7.28125, "sft_loss": 0.6015625, "step": 3902 }, { "dpo_loss": 0.09814453125, "epoch": 0.62, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 3.1497972088174554e-07, "loss": 0.0961, "projector_lr": 9.449391626452368e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.71875, "sft_loss": 0.8671875, "step": 3903 }, { "dpo_loss": 0.306640625, "epoch": 0.62, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 3.1474388014216375e-07, "loss": 0.3358, "projector_lr": 9.442316404264913e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.9375, "sft_loss": 0.796875, "step": 3904 }, { "dpo_loss": 0.4609375, "epoch": 0.62, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 3.145080871678333e-07, "loss": 0.2463, "projector_lr": 9.435242615034999e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 2.625, "rewards_train/rejected": -3.625, "sft_loss": 0.890625, "step": 3905 }, { "dpo_loss": 0.0849609375, "epoch": 0.62, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 3.1427234201954957e-07, "loss": 0.1764, "projector_lr": 9.428170260586487e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 6.5625, "rewards_train/rejected": -7.71875, "sft_loss": 0.81640625, "step": 3906 }, { "dpo_loss": 0.232421875, "epoch": 0.63, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 3.140366447580958e-07, "loss": 0.1225, "projector_lr": 9.421099342742874e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.734375, "sft_loss": 0.6796875, "step": 3907 }, { "dpo_loss": 0.34375, "epoch": 0.63, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 3.1380099544424223e-07, "loss": 0.1929, "projector_lr": 9.414029863327267e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 2.328125, "rewards_train/rejected": -3.34375, "sft_loss": 0.87109375, "step": 3908 }, { "dpo_loss": 0.038818359375, "epoch": 0.63, "final_loss": 0.038818359375, "grad_norm": 0.0, "learning_rate": 3.135653941387473e-07, "loss": 0.0241, "projector_lr": 9.40696182416242e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.59375, "sft_loss": 0.625, "step": 3909 }, { "dpo_loss": 0.2197265625, "epoch": 0.63, "final_loss": 0.2197265625, "grad_norm": 0.0, "learning_rate": 3.1332984090235713e-07, "loss": 0.1345, "projector_lr": 9.399895227070716e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.40625, "sft_loss": 0.81640625, "step": 3910 }, { "dpo_loss": 0.2578125, "epoch": 0.63, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 3.1309433579580525e-07, "loss": 0.1661, "projector_lr": 9.392830073874158e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.4375, "sft_loss": 0.7265625, "step": 3911 }, { "dpo_loss": 0.08203125, "epoch": 0.63, "final_loss": 0.08203125, "grad_norm": 0.0, "learning_rate": 3.1285887887981256e-07, "loss": 0.1255, "projector_lr": 9.385766366394377e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.41796875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.4375, "sft_loss": 0.7265625, "step": 3912 }, { "dpo_loss": 0.003662109375, "epoch": 0.63, "final_loss": 0.003662109375, "grad_norm": 0.0, "learning_rate": 3.12623470215088e-07, "loss": 0.3261, "projector_lr": 9.378704106452641e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.796875, "rewards_train/margins": 6.9375, "rewards_train/rejected": -7.71875, "sft_loss": 0.80078125, "step": 3913 }, { "dpo_loss": 0.40234375, "epoch": 0.63, "final_loss": 0.40234375, "grad_norm": 0.0, "learning_rate": 3.1238810986232764e-07, "loss": 0.3748, "projector_lr": 9.37164329586983e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.15234375, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.125, "sft_loss": 0.69140625, "step": 3914 }, { "dpo_loss": 0.1025390625, "epoch": 0.63, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 3.121527978822154e-07, "loss": 0.3607, "projector_lr": 9.364583936466462e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.1875, "sft_loss": 0.72265625, "step": 3915 }, { "dpo_loss": 0.08984375, "epoch": 0.63, "final_loss": 0.08984375, "grad_norm": 0.0, "learning_rate": 3.119175343354226e-07, "loss": 0.1291, "projector_lr": 9.35752603006268e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.4375, "sft_loss": 0.62890625, "step": 3916 }, { "dpo_loss": 0.189453125, "epoch": 0.63, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 3.1168231928260803e-07, "loss": 0.112, "projector_lr": 9.350469578478241e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.15625, "sft_loss": 0.7734375, "step": 3917 }, { "dpo_loss": 0.033935546875, "epoch": 0.63, "final_loss": 0.033935546875, "grad_norm": 0.0, "learning_rate": 3.114471527844181e-07, "loss": 0.102, "projector_lr": 9.343414583532543e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 6.25, "rewards_train/rejected": -6.875, "sft_loss": 0.73828125, "step": 3918 }, { "dpo_loss": 0.08154296875, "epoch": 0.63, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 3.1121203490148664e-07, "loss": 0.061, "projector_lr": 9.336361047044599e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 6.84375, "rewards_train/rejected": -8.25, "sft_loss": 0.63671875, "step": 3919 }, { "dpo_loss": 0.11669921875, "epoch": 0.63, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 3.10976965694435e-07, "loss": 0.1253, "projector_lr": 9.32930897083305e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.03125, "sft_loss": 0.71875, "step": 3920 }, { "dpo_loss": 0.326171875, "epoch": 0.63, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 3.1074194522387166e-07, "loss": 0.4488, "projector_lr": 9.32225835671615e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.018798828125, "rewards_train/margins": 2.53125, "rewards_train/rejected": -2.546875, "sft_loss": 0.71875, "step": 3921 }, { "dpo_loss": 0.109375, "epoch": 0.63, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 3.10506973550393e-07, "loss": 0.0729, "projector_lr": 9.31520920651179e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4140625, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.28125, "sft_loss": 0.8046875, "step": 3922 }, { "dpo_loss": 0.12158203125, "epoch": 0.63, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 3.102720507345825e-07, "loss": 0.235, "projector_lr": 9.308161522037476e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.01220703125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.09375, "sft_loss": 0.5625, "step": 3923 }, { "dpo_loss": 0.259765625, "epoch": 0.63, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 3.1003717683701124e-07, "loss": 0.1366, "projector_lr": 9.301115305110338e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.109375, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.75, "sft_loss": 0.69140625, "step": 3924 }, { "dpo_loss": 0.06298828125, "epoch": 0.63, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 3.098023519182379e-07, "loss": 0.0958, "projector_lr": 9.294070557547138e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.65625, "sft_loss": 0.84375, "step": 3925 }, { "dpo_loss": 0.04296875, "epoch": 0.63, "final_loss": 0.04296875, "grad_norm": 0.0, "learning_rate": 3.0956757603880764e-07, "loss": 0.0277, "projector_lr": 9.287027281164229e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06201171875, "rewards_train/margins": 7.3125, "rewards_train/rejected": -7.375, "sft_loss": 0.72265625, "step": 3926 }, { "dpo_loss": 0.0228271484375, "epoch": 0.63, "final_loss": 0.0228271484375, "grad_norm": 0.0, "learning_rate": 3.0933284925925385e-07, "loss": 0.0253, "projector_lr": 9.279985477777616e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.28125, "sft_loss": 0.98046875, "step": 3927 }, { "dpo_loss": 0.031982421875, "epoch": 0.63, "final_loss": 0.031982421875, "grad_norm": 0.0, "learning_rate": 3.0909817164009694e-07, "loss": 0.1786, "projector_lr": 9.272945149202909e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.375, "sft_loss": 0.76171875, "step": 3928 }, { "dpo_loss": 0.12890625, "epoch": 0.63, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 3.088635432418449e-07, "loss": 0.1167, "projector_lr": 9.265906297255349e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.625, "sft_loss": 0.82421875, "step": 3929 }, { "dpo_loss": 0.06689453125, "epoch": 0.63, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 3.0862896412499255e-07, "loss": 0.1085, "projector_lr": 9.258868923749778e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.75, "sft_loss": 0.7890625, "step": 3930 }, { "dpo_loss": 0.0927734375, "epoch": 0.63, "final_loss": 0.0927734375, "grad_norm": 0.0, "learning_rate": 3.0839443435002235e-07, "loss": 0.1735, "projector_lr": 9.25183303050067e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.6875, "sft_loss": 0.6328125, "step": 3931 }, { "dpo_loss": 0.09765625, "epoch": 0.63, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 3.0815995397740393e-07, "loss": 0.0499, "projector_lr": 9.244798619322117e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7734375, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.84375, "sft_loss": 0.62890625, "step": 3932 }, { "dpo_loss": 0.169921875, "epoch": 0.63, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 3.079255230675941e-07, "loss": 0.2086, "projector_lr": 9.237765692027824e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.8125, "sft_loss": 1.703125, "step": 3933 }, { "dpo_loss": 0.006927490234375, "epoch": 0.63, "final_loss": 0.006927490234375, "grad_norm": 0.0, "learning_rate": 3.076911416810375e-07, "loss": 0.0102, "projector_lr": 9.230734250431126e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.375, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.5625, "sft_loss": 0.65234375, "step": 3934 }, { "dpo_loss": 0.032470703125, "epoch": 0.63, "final_loss": 0.032470703125, "grad_norm": 0.0, "learning_rate": 3.0745680987816495e-07, "loss": 0.2815, "projector_lr": 9.22370429634495e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.34375, "sft_loss": 0.7265625, "step": 3935 }, { "dpo_loss": 0.09814453125, "epoch": 0.63, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 3.0722252771939533e-07, "loss": 0.1973, "projector_lr": 9.216675831581861e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.390625, "sft_loss": 0.5703125, "step": 3936 }, { "dpo_loss": 0.4765625, "epoch": 0.63, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 3.0698829526513437e-07, "loss": 0.3028, "projector_lr": 9.209648857954032e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.890625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.125, "sft_loss": 0.875, "step": 3937 }, { "dpo_loss": 0.201171875, "epoch": 0.63, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 3.067541125757753e-07, "loss": 0.1688, "projector_lr": 9.202623377273261e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.859375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.34375, "sft_loss": 0.9296875, "step": 3938 }, { "dpo_loss": 0.119140625, "epoch": 0.63, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 3.0651997971169794e-07, "loss": 0.0858, "projector_lr": 9.195599391350939e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.59375, "sft_loss": 0.69921875, "step": 3939 }, { "dpo_loss": 0.1669921875, "epoch": 0.63, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 3.062858967332698e-07, "loss": 0.3009, "projector_lr": 9.188576901998095e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.34375, "sft_loss": 0.875, "step": 3940 }, { "dpo_loss": 0.0908203125, "epoch": 0.63, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 3.060518637008452e-07, "loss": 0.1633, "projector_lr": 9.181555911025358e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.84375, "sft_loss": 0.87890625, "step": 3941 }, { "dpo_loss": 0.10546875, "epoch": 0.63, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 3.0581788067476593e-07, "loss": 0.0946, "projector_lr": 9.174536420242979e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.28125, "sft_loss": 0.65234375, "step": 3942 }, { "dpo_loss": 0.05029296875, "epoch": 0.63, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 3.055839477153607e-07, "loss": 0.1997, "projector_lr": 9.167518431460822e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.75, "sft_loss": 0.78515625, "step": 3943 }, { "dpo_loss": 0.080078125, "epoch": 0.63, "final_loss": 0.080078125, "grad_norm": 0.0, "learning_rate": 3.05350064882945e-07, "loss": 0.1932, "projector_lr": 9.160501946488351e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.5, "sft_loss": 0.78515625, "step": 3944 }, { "dpo_loss": 0.279296875, "epoch": 0.63, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 3.0511623223782177e-07, "loss": 0.3384, "projector_lr": 9.153486967134655e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.375, "sft_loss": 0.84765625, "step": 3945 }, { "dpo_loss": 0.154296875, "epoch": 0.63, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 3.04882449840281e-07, "loss": 0.3447, "projector_lr": 9.14647349520843e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.84375, "sft_loss": 0.5859375, "step": 3946 }, { "dpo_loss": 0.3515625, "epoch": 0.63, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 3.0464871775059986e-07, "loss": 0.2659, "projector_lr": 9.139461532517997e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.34375, "sft_loss": 1.03125, "step": 3947 }, { "dpo_loss": 0.1220703125, "epoch": 0.63, "final_loss": 0.1220703125, "grad_norm": 0.0, "learning_rate": 3.044150360290419e-07, "loss": 0.072, "projector_lr": 9.132451080871257e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.890625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -7.5625, "sft_loss": 0.91015625, "step": 3948 }, { "dpo_loss": 0.061767578125, "epoch": 0.63, "final_loss": 0.061767578125, "grad_norm": 0.0, "learning_rate": 3.041814047358583e-07, "loss": 0.2583, "projector_lr": 9.12544214207575e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.875, "rewards_train/margins": 6.5625, "rewards_train/rejected": -7.4375, "sft_loss": 0.61328125, "step": 3949 }, { "dpo_loss": 0.09912109375, "epoch": 0.63, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 3.039478239312871e-07, "loss": 0.3106, "projector_lr": 9.118434717938613e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.193359375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.5, "sft_loss": 0.625, "step": 3950 }, { "dpo_loss": 0.21484375, "epoch": 0.63, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 3.037142936755532e-07, "loss": 0.1142, "projector_lr": 9.111428810266598e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.671875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.84375, "sft_loss": 0.8828125, "step": 3951 }, { "dpo_loss": 0.01531982421875, "epoch": 0.63, "final_loss": 0.01531982421875, "grad_norm": 0.0, "learning_rate": 3.034808140288688e-07, "loss": 0.0471, "projector_lr": 9.104424420866064e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.083984375, "rewards_train/margins": 8.5625, "rewards_train/rejected": -8.625, "sft_loss": 0.474609375, "step": 3952 }, { "dpo_loss": 0.0859375, "epoch": 0.63, "final_loss": 0.0859375, "grad_norm": 0.0, "learning_rate": 3.0324738505143243e-07, "loss": 0.1647, "projector_lr": 9.097421551542974e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.625, "sft_loss": 0.7265625, "step": 3953 }, { "dpo_loss": 0.09130859375, "epoch": 0.63, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 3.030140068034302e-07, "loss": 0.1067, "projector_lr": 9.090420204102907e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.9375, "sft_loss": 0.60546875, "step": 3954 }, { "dpo_loss": 0.2392578125, "epoch": 0.63, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 3.027806793450346e-07, "loss": 0.1814, "projector_lr": 9.08342038035104e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.25, "rewards_train/margins": 4.8125, "rewards_train/rejected": -7.0625, "sft_loss": 0.96875, "step": 3955 }, { "dpo_loss": 0.291015625, "epoch": 0.63, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 3.0254740273640565e-07, "loss": 0.5567, "projector_lr": 9.07642208209217e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.34375, "sft_loss": 0.6484375, "step": 3956 }, { "dpo_loss": 0.10986328125, "epoch": 0.63, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 3.023141770376896e-07, "loss": 0.1279, "projector_lr": 9.069425311130688e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.71875, "sft_loss": 0.7734375, "step": 3957 }, { "dpo_loss": 0.09326171875, "epoch": 0.63, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 3.020810023090199e-07, "loss": 0.1043, "projector_lr": 9.062430069270597e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.84375, "sft_loss": 0.7734375, "step": 3958 }, { "dpo_loss": 0.034912109375, "epoch": 0.63, "final_loss": 0.034912109375, "grad_norm": 0.0, "learning_rate": 3.0184787861051684e-07, "loss": 0.1662, "projector_lr": 9.055436358315506e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.78125, "sft_loss": 0.8125, "step": 3959 }, { "dpo_loss": 0.1552734375, "epoch": 0.63, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 3.016148060022877e-07, "loss": 0.2041, "projector_lr": 9.048444180068633e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 2.265625, "rewards_train/rejected": -2.859375, "sft_loss": 0.70703125, "step": 3960 }, { "dpo_loss": 0.09423828125, "epoch": 0.63, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 3.0138178454442643e-07, "loss": 0.1498, "projector_lr": 9.041453536332793e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.203125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.21875, "sft_loss": 0.69921875, "step": 3961 }, { "dpo_loss": 0.2392578125, "epoch": 0.63, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 3.0114881429701343e-07, "loss": 0.2242, "projector_lr": 9.034464428910403e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.25, "sft_loss": 0.75, "step": 3962 }, { "dpo_loss": 0.1474609375, "epoch": 0.63, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 3.0091589532011663e-07, "loss": 0.1616, "projector_lr": 9.0274768596035e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.375, "sft_loss": 0.76171875, "step": 3963 }, { "dpo_loss": 0.296875, "epoch": 0.63, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 3.006830276737902e-07, "loss": 0.2178, "projector_lr": 9.020490830213706e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -7.9375, "sft_loss": 0.96484375, "step": 3964 }, { "dpo_loss": 0.1494140625, "epoch": 0.63, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 3.004502114180755e-07, "loss": 0.3853, "projector_lr": 9.013506342542265e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.234375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.6875, "sft_loss": 0.65234375, "step": 3965 }, { "dpo_loss": 0.048095703125, "epoch": 0.63, "final_loss": 0.048095703125, "grad_norm": 0.0, "learning_rate": 3.0021744661299983e-07, "loss": 0.1358, "projector_lr": 9.006523398389996e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.53125, "sft_loss": 0.66796875, "step": 3966 }, { "dpo_loss": 0.087890625, "epoch": 0.63, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 2.9998473331857834e-07, "loss": 0.0885, "projector_lr": 8.999541999557351e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.5625, "sft_loss": 0.625, "step": 3967 }, { "dpo_loss": 0.00958251953125, "epoch": 0.63, "final_loss": 0.00958251953125, "grad_norm": 0.0, "learning_rate": 2.997520715948119e-07, "loss": 0.0167, "projector_lr": 8.992562147844358e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.146484375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.25, "sft_loss": 0.6953125, "step": 3968 }, { "dpo_loss": 0.10205078125, "epoch": 0.64, "final_loss": 0.10205078125, "grad_norm": 0.0, "learning_rate": 2.9951946150168885e-07, "loss": 0.0917, "projector_lr": 8.985583845050666e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.25, "sft_loss": 0.6796875, "step": 3969 }, { "dpo_loss": 0.169921875, "epoch": 0.64, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 2.992869030991839e-07, "loss": 0.1091, "projector_lr": 8.978607092975518e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.84375, "sft_loss": 0.46875, "step": 3970 }, { "dpo_loss": 0.1806640625, "epoch": 0.64, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 2.9905439644725815e-07, "loss": 0.1313, "projector_lr": 8.971631893417746e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.09375, "sft_loss": 0.77734375, "step": 3971 }, { "dpo_loss": 0.0311279296875, "epoch": 0.64, "final_loss": 0.0311279296875, "grad_norm": 0.0, "learning_rate": 2.988219416058597e-07, "loss": 0.082, "projector_lr": 8.964658248175792e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.9375, "sft_loss": 0.9921875, "step": 3972 }, { "dpo_loss": 0.1904296875, "epoch": 0.64, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 2.985895386349233e-07, "loss": 0.0997, "projector_lr": 8.9576861590477e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.21875, "sft_loss": 0.6640625, "step": 3973 }, { "dpo_loss": 0.2353515625, "epoch": 0.64, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 2.9835718759437037e-07, "loss": 0.1177, "projector_lr": 8.950715627831113e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.177734375, "rewards_train/margins": 6.25, "rewards_train/rejected": -6.4375, "sft_loss": 0.859375, "step": 3974 }, { "dpo_loss": 0.1201171875, "epoch": 0.64, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 2.981248885441086e-07, "loss": 0.1173, "projector_lr": 8.943746656323257e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.359375, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.28125, "sft_loss": 0.765625, "step": 3975 }, { "dpo_loss": 0.240234375, "epoch": 0.64, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 2.9789264154403235e-07, "loss": 0.1866, "projector_lr": 8.936779246320972e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.42578125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.78125, "sft_loss": 0.859375, "step": 3976 }, { "dpo_loss": 0.130859375, "epoch": 0.64, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 2.97660446654023e-07, "loss": 0.1145, "projector_lr": 8.929813399620691e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.5625, "sft_loss": 0.59765625, "step": 3977 }, { "dpo_loss": 0.1875, "epoch": 0.64, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 2.9742830393394813e-07, "loss": 0.1336, "projector_lr": 8.922849118018445e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.375, "sft_loss": 0.79296875, "step": 3978 }, { "dpo_loss": 0.75, "epoch": 0.64, "final_loss": 0.75, "grad_norm": 0.0, "learning_rate": 2.9719621344366206e-07, "loss": 0.3794, "projector_lr": 8.915886403309863e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.046875, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.1875, "sft_loss": 0.9296875, "step": 3979 }, { "dpo_loss": 0.1728515625, "epoch": 0.64, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 2.9696417524300516e-07, "loss": 0.094, "projector_lr": 8.908925257290155e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.46875, "sft_loss": 0.6640625, "step": 3980 }, { "dpo_loss": 0.287109375, "epoch": 0.64, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 2.967321893918048e-07, "loss": 0.2058, "projector_lr": 8.901965681754146e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.21875, "sft_loss": 0.83203125, "step": 3981 }, { "dpo_loss": 0.384765625, "epoch": 0.64, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 2.9650025594987494e-07, "loss": 0.2384, "projector_lr": 8.895007678496249e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.4453125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.25, "sft_loss": 0.640625, "step": 3982 }, { "dpo_loss": 0.263671875, "epoch": 0.64, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 2.9626837497701573e-07, "loss": 0.1454, "projector_lr": 8.888051249310473e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.0625, "sft_loss": 0.62890625, "step": 3983 }, { "dpo_loss": 0.16796875, "epoch": 0.64, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 2.960365465330137e-07, "loss": 0.1843, "projector_lr": 8.881096395990411e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.15625, "sft_loss": 0.90234375, "step": 3984 }, { "dpo_loss": 0.0201416015625, "epoch": 0.64, "final_loss": 0.0201416015625, "grad_norm": 0.0, "learning_rate": 2.9580477067764206e-07, "loss": 0.1365, "projector_lr": 8.874143120329262e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22265625, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.5, "sft_loss": 0.8046875, "step": 3985 }, { "dpo_loss": 0.07763671875, "epoch": 0.64, "final_loss": 0.07763671875, "grad_norm": 0.0, "learning_rate": 2.9557304747066046e-07, "loss": 0.1508, "projector_lr": 8.867191424119815e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.203125, "rewards_train/margins": 6.75, "rewards_train/rejected": -6.96875, "sft_loss": 0.6484375, "step": 3986 }, { "dpo_loss": 0.208984375, "epoch": 0.64, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 2.9534137697181494e-07, "loss": 0.1924, "projector_lr": 8.860241309154449e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.1875, "sft_loss": 0.609375, "step": 3987 }, { "dpo_loss": 0.734375, "epoch": 0.64, "final_loss": 0.734375, "grad_norm": 0.0, "learning_rate": 2.951097592408381e-07, "loss": 0.3705, "projector_lr": 8.853292777225145e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.34375, "rewards_train/margins": 1.921875, "rewards_train/rejected": -3.265625, "sft_loss": 0.875, "step": 3988 }, { "dpo_loss": 0.119140625, "epoch": 0.64, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 2.948781943374485e-07, "loss": 0.1316, "projector_lr": 8.846345830123457e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.37890625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.9375, "sft_loss": 0.625, "step": 3989 }, { "dpo_loss": 0.484375, "epoch": 0.64, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 2.946466823213515e-07, "loss": 0.4973, "projector_lr": 8.839400469640545e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.0625, "sft_loss": 0.50390625, "step": 3990 }, { "dpo_loss": 0.263671875, "epoch": 0.64, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 2.9441522325223857e-07, "loss": 0.1651, "projector_lr": 8.832456697567158e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.4375, "sft_loss": 1.0078125, "step": 3991 }, { "dpo_loss": 0.10302734375, "epoch": 0.64, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 2.941838171897878e-07, "loss": 0.0829, "projector_lr": 8.825514515693635e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.4375, "sft_loss": 0.56640625, "step": 3992 }, { "dpo_loss": 0.2177734375, "epoch": 0.64, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 2.9395246419366323e-07, "loss": 0.1279, "projector_lr": 8.818573925809896e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.134765625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.28125, "sft_loss": 0.671875, "step": 3993 }, { "dpo_loss": 0.0303955078125, "epoch": 0.64, "final_loss": 0.0303955078125, "grad_norm": 0.0, "learning_rate": 2.9372116432351566e-07, "loss": 0.1594, "projector_lr": 8.811634929705471e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2890625, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.1875, "sft_loss": 0.703125, "step": 3994 }, { "dpo_loss": 0.16015625, "epoch": 0.64, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 2.934899176389818e-07, "loss": 0.1258, "projector_lr": 8.804697529169454e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.34375, "sft_loss": 0.79296875, "step": 3995 }, { "dpo_loss": 0.259765625, "epoch": 0.64, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 2.932587241996848e-07, "loss": 0.2541, "projector_lr": 8.797761725990544e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.06201171875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -4.65625, "sft_loss": 0.8125, "step": 3996 }, { "dpo_loss": 0.0216064453125, "epoch": 0.64, "final_loss": 0.0216064453125, "grad_norm": 0.0, "learning_rate": 2.930275840652343e-07, "loss": 0.1865, "projector_lr": 8.79082752195703e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.25, "sft_loss": 0.62890625, "step": 3997 }, { "dpo_loss": 0.1435546875, "epoch": 0.64, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 2.927964972952258e-07, "loss": 0.2458, "projector_lr": 8.783894918856774e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.5625, "sft_loss": 0.58984375, "step": 3998 }, { "dpo_loss": 0.29296875, "epoch": 0.64, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 2.925654639492412e-07, "loss": 0.3923, "projector_lr": 8.776963918477236e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.419921875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.1875, "sft_loss": 0.8515625, "step": 3999 }, { "dpo_loss": 0.2236328125, "epoch": 0.64, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 2.923344840868489e-07, "loss": 0.2512, "projector_lr": 8.770034522605468e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.625, "sft_loss": 1.125, "step": 4000 }, { "dpo_loss": 0.09033203125, "epoch": 0.64, "final_loss": 0.09033203125, "grad_norm": 0.0, "learning_rate": 2.921035577676032e-07, "loss": 0.2331, "projector_lr": 8.763106733028097e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1064453125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.1875, "sft_loss": 0.58984375, "step": 4001 }, { "dpo_loss": 0.06494140625, "epoch": 0.64, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 2.9187268505104435e-07, "loss": 0.126, "projector_lr": 8.756180551531331e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.90625, "sft_loss": 0.7421875, "step": 4002 }, { "dpo_loss": 0.09326171875, "epoch": 0.64, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 2.916418659966995e-07, "loss": 0.1661, "projector_lr": 8.749255979900985e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4921875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.65625, "sft_loss": 0.703125, "step": 4003 }, { "dpo_loss": 0.04345703125, "epoch": 0.64, "final_loss": 0.04345703125, "grad_norm": 0.0, "learning_rate": 2.9141110066408114e-07, "loss": 0.2055, "projector_lr": 8.742333019922435e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37109375, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.78125, "sft_loss": 0.59375, "step": 4004 }, { "dpo_loss": 0.265625, "epoch": 0.64, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 2.911803891126888e-07, "loss": 0.1679, "projector_lr": 8.735411673380665e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2060546875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.375, "sft_loss": 0.78125, "step": 4005 }, { "dpo_loss": 0.01904296875, "epoch": 0.64, "final_loss": 0.01904296875, "grad_norm": 0.0, "learning_rate": 2.9094973140200747e-07, "loss": 0.0316, "projector_lr": 8.728491942060224e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.8125, "sft_loss": 0.7421875, "step": 4006 }, { "dpo_loss": 0.375, "epoch": 0.64, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 2.907191275915084e-07, "loss": 0.4475, "projector_lr": 8.721573827745253e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.15625, "sft_loss": 0.70703125, "step": 4007 }, { "dpo_loss": 0.224609375, "epoch": 0.64, "final_loss": 0.224609375, "grad_norm": 0.0, "learning_rate": 2.9048857774064876e-07, "loss": 0.152, "projector_lr": 8.714657332219464e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.75, "sft_loss": 1.25, "step": 4008 }, { "dpo_loss": 0.08203125, "epoch": 0.64, "final_loss": 0.08203125, "grad_norm": 0.0, "learning_rate": 2.902580819088726e-07, "loss": 0.1038, "projector_lr": 8.707742457266177e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.380859375, "rewards_train/margins": 7.25, "rewards_train/rejected": -7.625, "sft_loss": 0.77734375, "step": 4009 }, { "dpo_loss": 0.03515625, "epoch": 0.64, "final_loss": 0.03515625, "grad_norm": 0.0, "learning_rate": 2.900276401556091e-07, "loss": 0.0739, "projector_lr": 8.700829204668274e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.6875, "sft_loss": 0.71484375, "step": 4010 }, { "dpo_loss": 0.1162109375, "epoch": 0.64, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 2.897972525402739e-07, "loss": 0.0785, "projector_lr": 8.693917576208217e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.71875, "sft_loss": 0.71484375, "step": 4011 }, { "dpo_loss": 0.42578125, "epoch": 0.64, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 2.8956691912226885e-07, "loss": 0.3294, "projector_lr": 8.687007573668065e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.765625, "sft_loss": 0.9453125, "step": 4012 }, { "dpo_loss": 0.330078125, "epoch": 0.64, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 2.8933663996098154e-07, "loss": 0.2324, "projector_lr": 8.680099198829448e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.09375, "sft_loss": 0.828125, "step": 4013 }, { "dpo_loss": 0.0341796875, "epoch": 0.64, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 2.8910641511578553e-07, "loss": 0.0437, "projector_lr": 8.673192453473566e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.4375, "sft_loss": 0.6953125, "step": 4014 }, { "dpo_loss": 0.1806640625, "epoch": 0.64, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 2.888762446460409e-07, "loss": 0.1636, "projector_lr": 8.666287339381227e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.25, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.65625, "sft_loss": 1.421875, "step": 4015 }, { "dpo_loss": 0.1552734375, "epoch": 0.64, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 2.88646128611093e-07, "loss": 0.186, "projector_lr": 8.659383858332791e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 6.9375, "rewards_train/rejected": -7.875, "sft_loss": 1.0234375, "step": 4016 }, { "dpo_loss": 0.0184326171875, "epoch": 0.64, "final_loss": 0.0184326171875, "grad_norm": 0.0, "learning_rate": 2.8841606707027346e-07, "loss": 0.085, "projector_lr": 8.652482012108204e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.53125, "sft_loss": 0.59375, "step": 4017 }, { "dpo_loss": 0.09228515625, "epoch": 0.64, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 2.8818606008290016e-07, "loss": 0.1448, "projector_lr": 8.645581802487005e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34765625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.5625, "sft_loss": 0.59375, "step": 4018 }, { "dpo_loss": 0.0498046875, "epoch": 0.64, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 2.879561077082764e-07, "loss": 0.052, "projector_lr": 8.638683231248292e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.15625, "sft_loss": 0.88671875, "step": 4019 }, { "dpo_loss": 0.1640625, "epoch": 0.64, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 2.877262100056917e-07, "loss": 0.2876, "projector_lr": 8.631786300170752e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.3125, "sft_loss": 0.75390625, "step": 4020 }, { "dpo_loss": 0.01214599609375, "epoch": 0.64, "final_loss": 0.01214599609375, "grad_norm": 0.0, "learning_rate": 2.874963670344212e-07, "loss": 0.1587, "projector_lr": 8.624891011032636e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 7.375, "rewards_train/rejected": -8.6875, "sft_loss": 0.55078125, "step": 4021 }, { "dpo_loss": 0.072265625, "epoch": 0.64, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 2.8726657885372655e-07, "loss": 0.0644, "projector_lr": 8.617997365611798e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.578125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.9375, "sft_loss": 0.7421875, "step": 4022 }, { "dpo_loss": 0.0299072265625, "epoch": 0.64, "final_loss": 0.0299072265625, "grad_norm": 0.0, "learning_rate": 2.8703684552285443e-07, "loss": 0.0718, "projector_lr": 8.611105365685635e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 7.0, "rewards_train/rejected": -7.6875, "sft_loss": 0.5625, "step": 4023 }, { "dpo_loss": 0.0771484375, "epoch": 0.64, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 2.868071671010385e-07, "loss": 0.2029, "projector_lr": 8.604215013031155e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.59375, "sft_loss": 1.046875, "step": 4024 }, { "dpo_loss": 0.0458984375, "epoch": 0.64, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 2.865775436474966e-07, "loss": 0.1415, "projector_lr": 8.597326309424898e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.09912109375, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.1875, "sft_loss": 0.55859375, "step": 4025 }, { "dpo_loss": 0.291015625, "epoch": 0.64, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 2.8634797522143403e-07, "loss": 0.1574, "projector_lr": 8.590439256643023e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.15625, "sft_loss": 0.8125, "step": 4026 }, { "dpo_loss": 0.142578125, "epoch": 0.64, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 2.8611846188204103e-07, "loss": 0.1029, "projector_lr": 8.583553856461231e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.90625, "sft_loss": 0.71875, "step": 4027 }, { "dpo_loss": 0.13671875, "epoch": 0.64, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 2.858890036884942e-07, "loss": 0.0776, "projector_lr": 8.576670110654828e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.25, "sft_loss": 0.6953125, "step": 4028 }, { "dpo_loss": 0.1201171875, "epoch": 0.64, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 2.856596006999549e-07, "loss": 0.1355, "projector_lr": 8.569788020998646e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.21875, "sft_loss": 0.92578125, "step": 4029 }, { "dpo_loss": 0.1474609375, "epoch": 0.64, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 2.8543025297557146e-07, "loss": 0.1149, "projector_lr": 8.562907589267144e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.12451171875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.1875, "sft_loss": 0.796875, "step": 4030 }, { "dpo_loss": 0.0072021484375, "epoch": 0.64, "final_loss": 0.0072021484375, "grad_norm": 0.0, "learning_rate": 2.8520096057447715e-07, "loss": 0.1429, "projector_lr": 8.556028817234314e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 7.40625, "rewards_train/rejected": -8.0625, "sft_loss": 0.6328125, "step": 4031 }, { "dpo_loss": 0.037109375, "epoch": 0.65, "final_loss": 0.037109375, "grad_norm": 0.0, "learning_rate": 2.8497172355579146e-07, "loss": 0.0438, "projector_lr": 8.549151706673744e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.21875, "sft_loss": 0.76953125, "step": 4032 }, { "dpo_loss": 0.232421875, "epoch": 0.65, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 2.8474254197861936e-07, "loss": 0.4029, "projector_lr": 8.542276259358582e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.09375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.28125, "sft_loss": 0.9765625, "step": 4033 }, { "dpo_loss": 0.2021484375, "epoch": 0.65, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 2.845134159020516e-07, "loss": 0.1203, "projector_lr": 8.535402477061548e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.9375, "sft_loss": 0.77734375, "step": 4034 }, { "dpo_loss": 0.30859375, "epoch": 0.65, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 2.842843453851641e-07, "loss": 0.256, "projector_lr": 8.528530361554924e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.515625, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.875, "sft_loss": 0.72265625, "step": 4035 }, { "dpo_loss": 0.8046875, "epoch": 0.65, "final_loss": 0.8046875, "grad_norm": 0.0, "learning_rate": 2.8405533048701966e-07, "loss": 0.4436, "projector_lr": 8.521659914610591e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.203125, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.671875, "sft_loss": 0.859375, "step": 4036 }, { "dpo_loss": 0.28515625, "epoch": 0.65, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 2.8382637126666565e-07, "loss": 0.2495, "projector_lr": 8.51479113799997e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.46875, "sft_loss": 1.125, "step": 4037 }, { "dpo_loss": 0.31640625, "epoch": 0.65, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 2.835974677831354e-07, "loss": 0.2615, "projector_lr": 8.507924033494063e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.875, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.53125, "sft_loss": 0.79296875, "step": 4038 }, { "dpo_loss": 0.072265625, "epoch": 0.65, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 2.833686200954479e-07, "loss": 0.064, "projector_lr": 8.501058602863438e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.625, "sft_loss": 0.6953125, "step": 4039 }, { "dpo_loss": 0.0145263671875, "epoch": 0.65, "final_loss": 0.0145263671875, "grad_norm": 0.0, "learning_rate": 2.83139828262608e-07, "loss": 0.3244, "projector_lr": 8.494194847878241e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26171875, "rewards_train/margins": 6.84375, "rewards_train/rejected": -6.5625, "sft_loss": 0.7421875, "step": 4040 }, { "dpo_loss": 0.322265625, "epoch": 0.65, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 2.829110923436056e-07, "loss": 0.1744, "projector_lr": 8.487332770308169e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.34375, "sft_loss": 0.8046875, "step": 4041 }, { "dpo_loss": 0.007568359375, "epoch": 0.65, "final_loss": 0.007568359375, "grad_norm": 0.0, "learning_rate": 2.8268241239741706e-07, "loss": 0.0204, "projector_lr": 8.480472371922513e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.46875, "sft_loss": 0.9453125, "step": 4042 }, { "dpo_loss": 0.15625, "epoch": 0.65, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 2.8245378848300304e-07, "loss": 0.2006, "projector_lr": 8.473613654490093e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2275390625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.21875, "sft_loss": 0.54296875, "step": 4043 }, { "dpo_loss": 0.09521484375, "epoch": 0.65, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 2.82225220659311e-07, "loss": 0.0857, "projector_lr": 8.46675661977933e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.0625, "sft_loss": 0.67578125, "step": 4044 }, { "dpo_loss": 0.01361083984375, "epoch": 0.65, "final_loss": 0.01361083984375, "grad_norm": 0.0, "learning_rate": 2.8199670898527294e-07, "loss": 0.0832, "projector_lr": 8.459901269558189e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.84375, "sft_loss": 0.640625, "step": 4045 }, { "dpo_loss": 0.259765625, "epoch": 0.65, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 2.817682535198076e-07, "loss": 0.5367, "projector_lr": 8.453047605594229e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.09375, "sft_loss": 0.8984375, "step": 4046 }, { "dpo_loss": 0.07861328125, "epoch": 0.65, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 2.815398543218176e-07, "loss": 0.2824, "projector_lr": 8.446195629654529e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0133056640625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.21875, "sft_loss": 0.7578125, "step": 4047 }, { "dpo_loss": 0.5625, "epoch": 0.65, "final_loss": 0.5625, "grad_norm": 0.0, "learning_rate": 2.8131151145019245e-07, "loss": 0.3615, "projector_lr": 8.439345343505774e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.96875, "sft_loss": 0.80859375, "step": 4048 }, { "dpo_loss": 0.0250244140625, "epoch": 0.65, "final_loss": 0.0250244140625, "grad_norm": 0.0, "learning_rate": 2.810832249638063e-07, "loss": 0.0606, "projector_lr": 8.43249674891419e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2099609375, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.34375, "sft_loss": 0.69921875, "step": 4049 }, { "dpo_loss": 0.06640625, "epoch": 0.65, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 2.808549949215194e-07, "loss": 0.0952, "projector_lr": 8.425649847645583e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 7.71875, "rewards_train/rejected": -8.0, "sft_loss": 0.8203125, "step": 4050 }, { "dpo_loss": 0.01226806640625, "epoch": 0.65, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 2.806268213821771e-07, "loss": 0.0461, "projector_lr": 8.418804641465313e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.515625, "rewards_train/margins": 7.875, "rewards_train/rejected": -8.375, "sft_loss": 0.5234375, "step": 4051 }, { "dpo_loss": 0.173828125, "epoch": 0.65, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 2.8039870440461e-07, "loss": 0.276, "projector_lr": 8.4119611321383e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.375, "sft_loss": 0.875, "step": 4052 }, { "dpo_loss": 0.024169921875, "epoch": 0.65, "final_loss": 0.024169921875, "grad_norm": 0.0, "learning_rate": 2.8017064404763423e-07, "loss": 0.1622, "projector_lr": 8.405119321429027e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26953125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.125, "sft_loss": 0.58203125, "step": 4053 }, { "dpo_loss": 0.27734375, "epoch": 0.65, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 2.7994264037005175e-07, "loss": 0.5134, "projector_lr": 8.398279211101554e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.375, "sft_loss": 0.474609375, "step": 4054 }, { "dpo_loss": 0.0947265625, "epoch": 0.65, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 2.797146934306495e-07, "loss": 0.0939, "projector_lr": 8.391440802919484e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.9375, "sft_loss": 0.7890625, "step": 4055 }, { "dpo_loss": 0.10693359375, "epoch": 0.65, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 2.7948680328819954e-07, "loss": 0.0823, "projector_lr": 8.384604098645986e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 3.546875, "rewards_train/rejected": -3.734375, "sft_loss": 0.7109375, "step": 4056 }, { "dpo_loss": 0.1259765625, "epoch": 0.65, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 2.792589700014599e-07, "loss": 0.1998, "projector_lr": 8.377769100043798e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.625, "sft_loss": 0.75390625, "step": 4057 }, { "dpo_loss": 0.333984375, "epoch": 0.65, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 2.790311936291737e-07, "loss": 0.1966, "projector_lr": 8.370935808875212e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.15625, "sft_loss": 1.1875, "step": 4058 }, { "dpo_loss": 0.15625, "epoch": 0.65, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 2.78803474230069e-07, "loss": 0.3465, "projector_lr": 8.364104226902071e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.25, "sft_loss": 0.8359375, "step": 4059 }, { "dpo_loss": 0.1357421875, "epoch": 0.65, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 2.7857581186285996e-07, "loss": 0.0694, "projector_lr": 8.357274355885799e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.96875, "sft_loss": 0.9453125, "step": 4060 }, { "dpo_loss": 0.1298828125, "epoch": 0.65, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 2.783482065862453e-07, "loss": 0.0774, "projector_lr": 8.350446197587359e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.25, "sft_loss": 0.703125, "step": 4061 }, { "dpo_loss": 0.470703125, "epoch": 0.65, "final_loss": 0.470703125, "grad_norm": 0.0, "learning_rate": 2.7812065845890917e-07, "loss": 0.2876, "projector_lr": 8.343619753767277e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.8125, "sft_loss": 1.15625, "step": 4062 }, { "dpo_loss": 0.64453125, "epoch": 0.65, "final_loss": 0.64453125, "grad_norm": 0.0, "learning_rate": 2.7789316753952156e-07, "loss": 0.439, "projector_lr": 8.336795026185648e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 2.5625, "rewards_train/rejected": -3.1875, "sft_loss": 0.73828125, "step": 4063 }, { "dpo_loss": 0.049072265625, "epoch": 0.65, "final_loss": 0.049072265625, "grad_norm": 0.0, "learning_rate": 2.7766573388673704e-07, "loss": 0.0349, "projector_lr": 8.329972016602112e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.71875, "sft_loss": 0.703125, "step": 4064 }, { "dpo_loss": 0.09619140625, "epoch": 0.65, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 2.774383575591956e-07, "loss": 0.0645, "projector_lr": 8.323150726775869e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.921875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.9375, "sft_loss": 0.78515625, "step": 4065 }, { "dpo_loss": 0.1484375, "epoch": 0.65, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 2.772110386155223e-07, "loss": 0.0841, "projector_lr": 8.316331158465669e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.125, "sft_loss": 0.8828125, "step": 4066 }, { "dpo_loss": 0.322265625, "epoch": 0.65, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 2.7698377711432814e-07, "loss": 0.1844, "projector_lr": 8.309513313429844e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.296875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.9375, "sft_loss": 0.55859375, "step": 4067 }, { "dpo_loss": 0.006134033203125, "epoch": 0.65, "final_loss": 0.006134033203125, "grad_norm": 0.0, "learning_rate": 2.767565731142082e-07, "loss": 0.0164, "projector_lr": 8.302697193426246e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35546875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -5.96875, "sft_loss": 0.455078125, "step": 4068 }, { "dpo_loss": 0.193359375, "epoch": 0.65, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 2.7652942667374397e-07, "loss": 0.1136, "projector_lr": 8.29588280021232e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.00634765625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -5.65625, "sft_loss": 0.80078125, "step": 4069 }, { "dpo_loss": 0.271484375, "epoch": 0.65, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 2.763023378515006e-07, "loss": 0.3412, "projector_lr": 8.289070135545018e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.07763671875, "rewards_train/margins": 2.59375, "rewards_train/rejected": -2.65625, "sft_loss": 0.62890625, "step": 4070 }, { "dpo_loss": 0.392578125, "epoch": 0.65, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 2.7607530670602985e-07, "loss": 0.241, "projector_lr": 8.282259201180896e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.380859375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -4.78125, "sft_loss": 0.88671875, "step": 4071 }, { "dpo_loss": 0.263671875, "epoch": 0.65, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 2.7584833329586767e-07, "loss": 0.1413, "projector_lr": 8.275449998876031e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.125, "sft_loss": 0.84765625, "step": 4072 }, { "dpo_loss": 0.08154296875, "epoch": 0.65, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 2.756214176795358e-07, "loss": 0.0559, "projector_lr": 8.268642530386076e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.78125, "sft_loss": 0.84765625, "step": 4073 }, { "dpo_loss": 0.1005859375, "epoch": 0.65, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 2.753945599155402e-07, "loss": 0.1106, "projector_lr": 8.261836797466206e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.953125, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.21875, "sft_loss": 1.0078125, "step": 4074 }, { "dpo_loss": 0.236328125, "epoch": 0.65, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 2.751677600623727e-07, "loss": 0.3555, "projector_lr": 8.255032801871183e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0400390625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.0625, "sft_loss": 0.69921875, "step": 4075 }, { "dpo_loss": 0.0301513671875, "epoch": 0.65, "final_loss": 0.0301513671875, "grad_norm": 0.0, "learning_rate": 2.749410181785098e-07, "loss": 0.064, "projector_lr": 8.248230545355294e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.90625, "sft_loss": 0.78125, "step": 4076 }, { "dpo_loss": 0.04541015625, "epoch": 0.65, "final_loss": 0.04541015625, "grad_norm": 0.0, "learning_rate": 2.747143343224134e-07, "loss": 0.0527, "projector_lr": 8.241430029672404e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.09375, "sft_loss": 0.85546875, "step": 4077 }, { "dpo_loss": 0.06787109375, "epoch": 0.65, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 2.7448770855253014e-07, "loss": 0.1368, "projector_lr": 8.234631256575904e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25390625, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.375, "sft_loss": 0.71484375, "step": 4078 }, { "dpo_loss": 0.12890625, "epoch": 0.65, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 2.742611409272917e-07, "loss": 0.1174, "projector_lr": 8.227834227818751e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.78125, "sft_loss": 0.83203125, "step": 4079 }, { "dpo_loss": 0.28515625, "epoch": 0.65, "final_loss": 0.28515625, "grad_norm": 0.0, "learning_rate": 2.7403463150511454e-07, "loss": 0.4645, "projector_lr": 8.221038945153436e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.125, "sft_loss": 0.7265625, "step": 4080 }, { "dpo_loss": 0.5390625, "epoch": 0.65, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 2.738081803444009e-07, "loss": 0.3034, "projector_lr": 8.214245410332027e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.25, "rewards_train/margins": 2.53125, "rewards_train/rejected": -3.78125, "sft_loss": 0.67578125, "step": 4081 }, { "dpo_loss": 0.12890625, "epoch": 0.65, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 2.7358178750353733e-07, "loss": 0.1394, "projector_lr": 8.20745362510612e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.4375, "sft_loss": 0.6796875, "step": 4082 }, { "dpo_loss": 0.27734375, "epoch": 0.65, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 2.7335545304089536e-07, "loss": 0.1749, "projector_lr": 8.200663591226861e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.703125, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.5625, "sft_loss": 0.55078125, "step": 4083 }, { "dpo_loss": 0.181640625, "epoch": 0.65, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 2.7312917701483164e-07, "loss": 0.2462, "projector_lr": 8.19387531044495e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.10546875, "rewards_train/margins": 3.390625, "rewards_train/rejected": -3.28125, "sft_loss": 0.703125, "step": 4084 }, { "dpo_loss": 0.416015625, "epoch": 0.65, "final_loss": 0.416015625, "grad_norm": 0.0, "learning_rate": 2.729029594836879e-07, "loss": 0.2367, "projector_lr": 8.187088784510638e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.96875, "sft_loss": 1.0078125, "step": 4085 }, { "dpo_loss": 0.126953125, "epoch": 0.65, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 2.7267680050579034e-07, "loss": 0.0886, "projector_lr": 8.180304015173711e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.90625, "sft_loss": 0.953125, "step": 4086 }, { "dpo_loss": 0.1083984375, "epoch": 0.65, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 2.7245070013945093e-07, "loss": 0.0813, "projector_lr": 8.173521004183528e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.014892578125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.1875, "sft_loss": 0.38671875, "step": 4087 }, { "dpo_loss": 0.060546875, "epoch": 0.65, "final_loss": 0.060546875, "grad_norm": 0.0, "learning_rate": 2.7222465844296514e-07, "loss": 0.0868, "projector_lr": 8.166739753288956e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.4296875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -4.78125, "sft_loss": 0.578125, "step": 4088 }, { "dpo_loss": 0.0035552978515625, "epoch": 0.65, "final_loss": 0.0035552978515625, "grad_norm": 0.0, "learning_rate": 2.7199867547461475e-07, "loss": 0.233, "projector_lr": 8.159960264238443e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 7.46875, "rewards_train/rejected": -8.5625, "sft_loss": 0.8359375, "step": 4089 }, { "dpo_loss": 0.6015625, "epoch": 0.65, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 2.717727512926653e-07, "loss": 0.4406, "projector_lr": 8.15318253877996e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 1.953125, "rewards_train/rejected": -2.703125, "sft_loss": 0.71875, "step": 4090 }, { "dpo_loss": 0.0296630859375, "epoch": 0.65, "final_loss": 0.0296630859375, "grad_norm": 0.0, "learning_rate": 2.7154688595536803e-07, "loss": 0.1184, "projector_lr": 8.146406578661041e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.453125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.78125, "sft_loss": 0.8828125, "step": 4091 }, { "dpo_loss": 0.14453125, "epoch": 0.65, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 2.7132107952095853e-07, "loss": 0.1363, "projector_lr": 8.139632385628755e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.40625, "sft_loss": 0.60546875, "step": 4092 }, { "dpo_loss": 0.123046875, "epoch": 0.65, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 2.7109533204765703e-07, "loss": 0.2736, "projector_lr": 8.132859961429713e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.96875, "sft_loss": 0.8671875, "step": 4093 }, { "dpo_loss": 0.11865234375, "epoch": 0.66, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 2.708696435936689e-07, "loss": 0.1237, "projector_lr": 8.126089307810067e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.34375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -4.59375, "sft_loss": 0.6171875, "step": 4094 }, { "dpo_loss": 0.4140625, "epoch": 0.66, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 2.7064401421718443e-07, "loss": 0.3952, "projector_lr": 8.119320426515533e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.15625, "sft_loss": 0.8984375, "step": 4095 }, { "dpo_loss": 0.083984375, "epoch": 0.66, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 2.7041844397637824e-07, "loss": 0.0872, "projector_lr": 8.112553319291348e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.6875, "sft_loss": 0.93359375, "step": 4096 }, { "dpo_loss": 0.2373046875, "epoch": 0.66, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 2.7019293292940993e-07, "loss": 0.1227, "projector_lr": 8.105787987882298e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.625, "sft_loss": 0.828125, "step": 4097 }, { "dpo_loss": 0.546875, "epoch": 0.66, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 2.699674811344239e-07, "loss": 0.3425, "projector_lr": 8.099024434032718e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.625, "rewards_train/margins": 2.5625, "rewards_train/rejected": -3.1875, "sft_loss": 0.6015625, "step": 4098 }, { "dpo_loss": 0.53515625, "epoch": 0.66, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 2.6974208864954926e-07, "loss": 0.2703, "projector_lr": 8.092262659486479e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.0625, "sft_loss": 0.86328125, "step": 4099 }, { "dpo_loss": 0.158203125, "epoch": 0.66, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 2.695167555328997e-07, "loss": 0.2597, "projector_lr": 8.085502665986992e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.421875, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.890625, "sft_loss": 0.61328125, "step": 4100 }, { "dpo_loss": 0.2275390625, "epoch": 0.66, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 2.6929148184257345e-07, "loss": 0.2323, "projector_lr": 8.078744455277203e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.73828125, "step": 4101 }, { "dpo_loss": 0.271484375, "epoch": 0.66, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 2.6906626763665397e-07, "loss": 0.1757, "projector_lr": 8.07198802909962e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.232421875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.5625, "sft_loss": 0.69921875, "step": 4102 }, { "dpo_loss": 0.1728515625, "epoch": 0.66, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 2.688411129732087e-07, "loss": 0.166, "projector_lr": 8.065233389196262e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 3.09375, "rewards_train/rejected": -3.765625, "sft_loss": 0.96484375, "step": 4103 }, { "dpo_loss": 0.1953125, "epoch": 0.66, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 2.686160179102905e-07, "loss": 0.1538, "projector_lr": 8.058480537308717e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.671875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.5625, "sft_loss": 0.76171875, "step": 4104 }, { "dpo_loss": 0.24609375, "epoch": 0.66, "final_loss": 0.24609375, "grad_norm": 0.0, "learning_rate": 2.6839098250593627e-07, "loss": 0.201, "projector_lr": 8.051729475178089e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.048828125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -5.46875, "sft_loss": 0.7890625, "step": 4105 }, { "dpo_loss": 0.024169921875, "epoch": 0.66, "final_loss": 0.024169921875, "grad_norm": 0.0, "learning_rate": 2.681660068181676e-07, "loss": 0.1075, "projector_lr": 8.044980204545028e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16796875, "rewards_train/margins": 7.84375, "rewards_train/rejected": -8.0, "sft_loss": 0.82421875, "step": 4106 }, { "dpo_loss": 0.08740234375, "epoch": 0.66, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 2.6794109090499063e-07, "loss": 0.1196, "projector_lr": 8.038232727149719e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.34375, "sft_loss": 0.84375, "step": 4107 }, { "dpo_loss": 0.1552734375, "epoch": 0.66, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 2.6771623482439664e-07, "loss": 0.1584, "projector_lr": 8.0314870447319e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.1875, "sft_loss": 1.0859375, "step": 4108 }, { "dpo_loss": 0.162109375, "epoch": 0.66, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 2.6749143863436086e-07, "loss": 0.1501, "projector_lr": 8.024743159030826e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 6.103515625e-05, "rewards_train/margins": 4.78125, "rewards_train/rejected": -4.78125, "sft_loss": 0.64453125, "step": 4109 }, { "dpo_loss": 0.06298828125, "epoch": 0.66, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 2.6726670239284333e-07, "loss": 0.0803, "projector_lr": 8.0180010717853e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.03125, "sft_loss": 0.71484375, "step": 4110 }, { "dpo_loss": 0.30859375, "epoch": 0.66, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 2.670420261577884e-07, "loss": 0.1874, "projector_lr": 8.011260784733653e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 2.203125, "rewards_train/rejected": -2.9375, "sft_loss": 0.8203125, "step": 4111 }, { "dpo_loss": 0.2109375, "epoch": 0.66, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 2.6681740998712555e-07, "loss": 0.1572, "projector_lr": 8.004522299613767e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.3125, "sft_loss": 1.140625, "step": 4112 }, { "dpo_loss": 0.2578125, "epoch": 0.66, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 2.6659285393876797e-07, "loss": 0.1639, "projector_lr": 7.99778561816304e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.053466796875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.0625, "sft_loss": 0.59375, "step": 4113 }, { "dpo_loss": 0.055419921875, "epoch": 0.66, "final_loss": 0.055419921875, "grad_norm": 0.0, "learning_rate": 2.663683580706143e-07, "loss": 0.0736, "projector_lr": 7.99105074211843e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19140625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.15625, "sft_loss": 0.6796875, "step": 4114 }, { "dpo_loss": 0.03857421875, "epoch": 0.66, "final_loss": 0.03857421875, "grad_norm": 0.0, "learning_rate": 2.661439224405464e-07, "loss": 0.1185, "projector_lr": 7.984317673216393e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.71875, "sft_loss": 0.88671875, "step": 4115 }, { "dpo_loss": 0.0224609375, "epoch": 0.66, "final_loss": 0.0224609375, "grad_norm": 0.0, "learning_rate": 2.6591954710643195e-07, "loss": 0.1242, "projector_lr": 7.977586413192959e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.322265625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.71875, "sft_loss": 0.78515625, "step": 4116 }, { "dpo_loss": 0.064453125, "epoch": 0.66, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 2.656952321261219e-07, "loss": 0.1015, "projector_lr": 7.970856963783657e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.71875, "sft_loss": 0.94921875, "step": 4117 }, { "dpo_loss": 0.0289306640625, "epoch": 0.66, "final_loss": 0.0289306640625, "grad_norm": 0.0, "learning_rate": 2.654709775574528e-07, "loss": 0.0838, "projector_lr": 7.964129326723586e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.9375, "sft_loss": 0.73046875, "step": 4118 }, { "dpo_loss": 0.314453125, "epoch": 0.66, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 2.6524678345824445e-07, "loss": 0.3278, "projector_lr": 7.957403503747335e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.40625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.34375, "sft_loss": 1.0390625, "step": 4119 }, { "dpo_loss": 0.318359375, "epoch": 0.66, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 2.65022649886302e-07, "loss": 0.3922, "projector_lr": 7.95067949658906e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.59375, "sft_loss": 0.58984375, "step": 4120 }, { "dpo_loss": 0.0693359375, "epoch": 0.66, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 2.647985768994142e-07, "loss": 0.201, "projector_lr": 7.943957306982427e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1123046875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -4.4375, "sft_loss": 0.5625, "step": 4121 }, { "dpo_loss": 0.006378173828125, "epoch": 0.66, "final_loss": 0.006378173828125, "grad_norm": 0.0, "learning_rate": 2.645745645553551e-07, "loss": 0.1358, "projector_lr": 7.937236936660653e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.02490234375, "rewards_train/margins": 6.9375, "rewards_train/rejected": -6.96875, "sft_loss": 0.5625, "step": 4122 }, { "dpo_loss": 0.2275390625, "epoch": 0.66, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 2.6435061291188243e-07, "loss": 0.1419, "projector_lr": 7.930518387356473e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.65625, "sft_loss": 0.76171875, "step": 4123 }, { "dpo_loss": 0.021728515625, "epoch": 0.66, "final_loss": 0.021728515625, "grad_norm": 0.0, "learning_rate": 2.6412672202673823e-07, "loss": 0.0162, "projector_lr": 7.923801660802148e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1875, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.5625, "sft_loss": 0.55859375, "step": 4124 }, { "dpo_loss": 0.0162353515625, "epoch": 0.66, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 2.639028919576492e-07, "loss": 0.0393, "projector_lr": 7.917086758729476e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.4375, "sft_loss": 0.546875, "step": 4125 }, { "dpo_loss": 0.1865234375, "epoch": 0.66, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 2.636791227623264e-07, "loss": 0.1596, "projector_lr": 7.910373682869793e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 2.34375, "rewards_train/rejected": -3.34375, "sft_loss": 0.83984375, "step": 4126 }, { "dpo_loss": 0.1708984375, "epoch": 0.66, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 2.6345541449846496e-07, "loss": 0.1639, "projector_lr": 7.90366243495395e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.11669921875, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.59375, "sft_loss": 0.82421875, "step": 4127 }, { "dpo_loss": 0.0693359375, "epoch": 0.66, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 2.6323176722374445e-07, "loss": 0.3166, "projector_lr": 7.896953016712334e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.484375, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.375, "sft_loss": 0.8828125, "step": 4128 }, { "dpo_loss": 0.08447265625, "epoch": 0.66, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 2.6300818099582835e-07, "loss": 0.2172, "projector_lr": 7.890245429874851e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.25, "sft_loss": 0.91015625, "step": 4129 }, { "dpo_loss": 0.053466796875, "epoch": 0.66, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 2.627846558723651e-07, "loss": 0.36, "projector_lr": 7.883539676170953e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.5, "sft_loss": 0.76953125, "step": 4130 }, { "dpo_loss": 0.1513671875, "epoch": 0.66, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 2.625611919109866e-07, "loss": 0.2519, "projector_lr": 7.876835757329598e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.37109375, "rewards_train/margins": 8.125, "rewards_train/rejected": -8.5, "sft_loss": 0.59765625, "step": 4131 }, { "dpo_loss": 0.2734375, "epoch": 0.66, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 2.623377891693097e-07, "loss": 0.2966, "projector_lr": 7.870133675079292e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.0, "sft_loss": 0.8046875, "step": 4132 }, { "dpo_loss": 0.08447265625, "epoch": 0.66, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 2.6211444770493506e-07, "loss": 0.1227, "projector_lr": 7.863433431148052e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.306640625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.15625, "sft_loss": 0.83203125, "step": 4133 }, { "dpo_loss": 0.044921875, "epoch": 0.66, "final_loss": 0.044921875, "grad_norm": 0.0, "learning_rate": 2.618911675754476e-07, "loss": 0.127, "projector_lr": 7.856735027263429e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.0625, "sft_loss": 0.7265625, "step": 4134 }, { "dpo_loss": 0.134765625, "epoch": 0.66, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 2.616679488384162e-07, "loss": 0.1878, "projector_lr": 7.850038465152486e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.25, "sft_loss": 0.77734375, "step": 4135 }, { "dpo_loss": 0.130859375, "epoch": 0.66, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 2.614447915513945e-07, "loss": 0.0941, "projector_lr": 7.843343746541837e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0751953125, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.3125, "sft_loss": 0.79296875, "step": 4136 }, { "dpo_loss": 0.240234375, "epoch": 0.66, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 2.612216957719199e-07, "loss": 0.1802, "projector_lr": 7.836650873157597e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.28125, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.34375, "sft_loss": 1.34375, "step": 4137 }, { "dpo_loss": 0.08544921875, "epoch": 0.66, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 2.609986615575135e-07, "loss": 0.0627, "projector_lr": 7.829959846725407e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -4.8125, "sft_loss": 0.61328125, "step": 4138 }, { "dpo_loss": 0.02294921875, "epoch": 0.66, "final_loss": 0.02294921875, "grad_norm": 0.0, "learning_rate": 2.6077568896568173e-07, "loss": 0.0358, "projector_lr": 7.823270668970453e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.1875, "sft_loss": 0.70703125, "step": 4139 }, { "dpo_loss": 0.10546875, "epoch": 0.66, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 2.605527780539141e-07, "loss": 0.0554, "projector_lr": 7.816583341617422e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.40625, "sft_loss": 0.62109375, "step": 4140 }, { "dpo_loss": 0.01904296875, "epoch": 0.66, "final_loss": 0.01904296875, "grad_norm": 0.0, "learning_rate": 2.6032992887968443e-07, "loss": 0.2323, "projector_lr": 7.809897866390532e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.08154296875, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.625, "sft_loss": 0.66796875, "step": 4141 }, { "dpo_loss": 0.8359375, "epoch": 0.66, "final_loss": 0.8359375, "grad_norm": 0.0, "learning_rate": 2.601071415004506e-07, "loss": 0.4648, "projector_lr": 7.803214245013519e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.671875, "rewards_train/margins": 2.25, "rewards_train/rejected": -3.921875, "sft_loss": 1.1484375, "step": 4142 }, { "dpo_loss": 0.291015625, "epoch": 0.66, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 2.5988441597365505e-07, "loss": 0.1866, "projector_lr": 7.796532479209653e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.15625, "sft_loss": 0.77734375, "step": 4143 }, { "dpo_loss": 0.0546875, "epoch": 0.66, "final_loss": 0.0546875, "grad_norm": 0.0, "learning_rate": 2.596617523567237e-07, "loss": 0.1058, "projector_lr": 7.78985257070171e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 6.40625, "rewards_train/rejected": -7.40625, "sft_loss": 0.78125, "step": 4144 }, { "dpo_loss": 0.029296875, "epoch": 0.66, "final_loss": 0.029296875, "grad_norm": 0.0, "learning_rate": 2.59439150707067e-07, "loss": 0.3393, "projector_lr": 7.783174521212011e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.953125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.53125, "sft_loss": 0.953125, "step": 4145 }, { "dpo_loss": 0.09716796875, "epoch": 0.66, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 2.5921661108207855e-07, "loss": 0.0824, "projector_lr": 7.776498332462357e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.76171875, "step": 4146 }, { "dpo_loss": 0.1611328125, "epoch": 0.66, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 2.5899413353913703e-07, "loss": 0.2257, "projector_lr": 7.769824006174111e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 3.109375, "rewards_train/rejected": -3.984375, "sft_loss": 0.79296875, "step": 4147 }, { "dpo_loss": 0.197265625, "epoch": 0.66, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 2.587717181356043e-07, "loss": 0.1304, "projector_lr": 7.763151544068129e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.5625, "sft_loss": 0.5859375, "step": 4148 }, { "dpo_loss": 0.5078125, "epoch": 0.66, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 2.5854936492882686e-07, "loss": 0.4741, "projector_lr": 7.756480947864806e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.1875, "sft_loss": 0.890625, "step": 4149 }, { "dpo_loss": 0.357421875, "epoch": 0.66, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 2.583270739761347e-07, "loss": 0.2607, "projector_lr": 7.749812219284043e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.875, "sft_loss": 0.44140625, "step": 4150 }, { "dpo_loss": 0.376953125, "epoch": 0.66, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 2.581048453348419e-07, "loss": 0.3536, "projector_lr": 7.743145360045257e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.90625, "sft_loss": 0.828125, "step": 4151 }, { "dpo_loss": 0.158203125, "epoch": 0.66, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 2.5788267906224613e-07, "loss": 0.0993, "projector_lr": 7.736480371867384e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.40625, "sft_loss": 0.72265625, "step": 4152 }, { "dpo_loss": 0.0966796875, "epoch": 0.66, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 2.576605752156299e-07, "loss": 0.4827, "projector_lr": 7.729817256468897e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.486328125, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.5, "sft_loss": 1.359375, "step": 4153 }, { "dpo_loss": 0.1123046875, "epoch": 0.66, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 2.574385338522587e-07, "loss": 0.0975, "projector_lr": 7.723156015567762e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.228515625, "rewards_train/margins": 5.71875, "rewards_train/rejected": -5.9375, "sft_loss": 0.80859375, "step": 4154 }, { "dpo_loss": 0.0269775390625, "epoch": 0.66, "final_loss": 0.0269775390625, "grad_norm": 0.0, "learning_rate": 2.572165550293824e-07, "loss": 0.0424, "projector_lr": 7.716496650881472e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.71875, "sft_loss": 0.72265625, "step": 4155 }, { "dpo_loss": 0.287109375, "epoch": 0.66, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 2.569946388042343e-07, "loss": 0.1806, "projector_lr": 7.709839164127028e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.359375, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.40625, "sft_loss": 0.8359375, "step": 4156 }, { "dpo_loss": 0.06591796875, "epoch": 0.67, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 2.567727852340322e-07, "loss": 0.0761, "projector_lr": 7.703183557020968e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.6875, "sft_loss": 0.61328125, "step": 4157 }, { "dpo_loss": 1.375, "epoch": 0.67, "final_loss": 1.375, "grad_norm": 0.0, "learning_rate": 2.565509943759772e-07, "loss": 0.7847, "projector_lr": 7.696529831279315e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.96875, "rewards_train/margins": 1.2578125, "rewards_train/rejected": -3.234375, "sft_loss": 1.5078125, "step": 4158 }, { "dpo_loss": 0.8046875, "epoch": 0.67, "final_loss": 0.8046875, "grad_norm": 0.0, "learning_rate": 2.563292662872548e-07, "loss": 0.4939, "projector_lr": 7.689877988617645e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.984375, "sft_loss": 0.95703125, "step": 4159 }, { "dpo_loss": 0.07177734375, "epoch": 0.67, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 2.5610760102503333e-07, "loss": 0.0933, "projector_lr": 7.683228030751e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.15625, "sft_loss": 0.78125, "step": 4160 }, { "dpo_loss": 0.330078125, "epoch": 0.67, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 2.5588599864646596e-07, "loss": 0.4964, "projector_lr": 7.67657995939398e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.03125, "sft_loss": 0.80078125, "step": 4161 }, { "dpo_loss": 0.043701171875, "epoch": 0.67, "final_loss": 0.043701171875, "grad_norm": 0.0, "learning_rate": 2.5566445920868904e-07, "loss": 0.2132, "projector_lr": 7.669933776260673e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.796875, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.75, "sft_loss": 0.71484375, "step": 4162 }, { "dpo_loss": 0.16015625, "epoch": 0.67, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 2.554429827688234e-07, "loss": 0.2783, "projector_lr": 7.663289483064703e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.026123046875, "rewards_train/margins": 3.875, "rewards_train/rejected": -3.90625, "sft_loss": 0.5703125, "step": 4163 }, { "dpo_loss": 0.431640625, "epoch": 0.67, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 2.5522156938397227e-07, "loss": 0.3061, "projector_lr": 7.656647081519169e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.34375, "sft_loss": 0.51953125, "step": 4164 }, { "dpo_loss": 0.41015625, "epoch": 0.67, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 2.5500021911122405e-07, "loss": 0.3484, "projector_lr": 7.650006573336721e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 2.640625, "rewards_train/rejected": -4.25, "sft_loss": 0.78515625, "step": 4165 }, { "dpo_loss": 0.2578125, "epoch": 0.67, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 2.547789320076499e-07, "loss": 0.2847, "projector_lr": 7.643367960229498e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.0625, "sft_loss": 0.9453125, "step": 4166 }, { "dpo_loss": 0.11083984375, "epoch": 0.67, "final_loss": 0.11083984375, "grad_norm": 0.0, "learning_rate": 2.545577081303055e-07, "loss": 0.1842, "projector_lr": 7.636731243909166e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.09375, "sft_loss": 0.625, "step": 4167 }, { "dpo_loss": 0.546875, "epoch": 0.67, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 2.5433654753622957e-07, "loss": 0.4789, "projector_lr": 7.630096426086888e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.59375, "sft_loss": 1.1328125, "step": 4168 }, { "dpo_loss": 0.4375, "epoch": 0.67, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 2.5411545028244474e-07, "loss": 0.4217, "projector_lr": 7.623463508473343e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.9375, "sft_loss": 0.921875, "step": 4169 }, { "dpo_loss": 0.44140625, "epoch": 0.67, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 2.538944164259571e-07, "loss": 0.349, "projector_lr": 7.616832492778713e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.140625, "rewards_train/margins": 1.734375, "rewards_train/rejected": -3.875, "sft_loss": 1.0703125, "step": 4170 }, { "dpo_loss": 0.02001953125, "epoch": 0.67, "final_loss": 0.02001953125, "grad_norm": 0.0, "learning_rate": 2.536734460237571e-07, "loss": 0.0147, "projector_lr": 7.610203380712712e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 7.46875, "rewards_train/rejected": -8.125, "sft_loss": 1.34375, "step": 4171 }, { "dpo_loss": 0.328125, "epoch": 0.67, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 2.534525391328179e-07, "loss": 0.3148, "projector_lr": 7.603576173984539e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.21875, "sft_loss": 0.859375, "step": 4172 }, { "dpo_loss": 0.06494140625, "epoch": 0.67, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 2.5323169581009676e-07, "loss": 0.1184, "projector_lr": 7.596950874302903e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.3125, "sft_loss": 0.6953125, "step": 4173 }, { "dpo_loss": 0.1552734375, "epoch": 0.67, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 2.530109161125348e-07, "loss": 0.1017, "projector_lr": 7.590327483376044e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.625, "sft_loss": 0.84375, "step": 4174 }, { "dpo_loss": 0.051513671875, "epoch": 0.67, "final_loss": 0.051513671875, "grad_norm": 0.0, "learning_rate": 2.527902000970562e-07, "loss": 0.0691, "projector_lr": 7.583706002911686e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.53125, "sft_loss": 0.92578125, "step": 4175 }, { "dpo_loss": 0.5234375, "epoch": 0.67, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 2.5256954782056893e-07, "loss": 0.3565, "projector_lr": 7.577086434617068e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 2.34375, "rewards_train/rejected": -3.53125, "sft_loss": 0.90625, "step": 4176 }, { "dpo_loss": 0.0185546875, "epoch": 0.67, "final_loss": 0.0185546875, "grad_norm": 0.0, "learning_rate": 2.5234895933996476e-07, "loss": 0.0413, "projector_lr": 7.570468780198943e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.53125, "sft_loss": 0.75, "step": 4177 }, { "dpo_loss": 0.1435546875, "epoch": 0.67, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 2.5212843471211876e-07, "loss": 0.1104, "projector_lr": 7.563853041363563e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.109375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.53125, "sft_loss": 0.859375, "step": 4178 }, { "dpo_loss": 0.1611328125, "epoch": 0.67, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 2.519079739938893e-07, "loss": 0.1005, "projector_lr": 7.55723921981668e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.375, "sft_loss": 0.79296875, "step": 4179 }, { "dpo_loss": 0.2373046875, "epoch": 0.67, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 2.5168757724211897e-07, "loss": 0.176, "projector_lr": 7.55062731726357e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.46875, "sft_loss": 0.7421875, "step": 4180 }, { "dpo_loss": 0.474609375, "epoch": 0.67, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 2.514672445136333e-07, "loss": 0.3258, "projector_lr": 7.544017335408999e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.28125, "sft_loss": 0.88671875, "step": 4181 }, { "dpo_loss": 0.23046875, "epoch": 0.67, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 2.512469758652414e-07, "loss": 0.1639, "projector_lr": 7.537409275957243e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.90625, "sft_loss": 0.82421875, "step": 4182 }, { "dpo_loss": 0.037841796875, "epoch": 0.67, "final_loss": 0.037841796875, "grad_norm": 0.0, "learning_rate": 2.5102677135373587e-07, "loss": 0.0286, "projector_lr": 7.530803140612075e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.203125, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.625, "sft_loss": 0.64453125, "step": 4183 }, { "dpo_loss": 0.419921875, "epoch": 0.67, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 2.5080663103589304e-07, "loss": 0.3335, "projector_lr": 7.524198931076792e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.875, "sft_loss": 0.7109375, "step": 4184 }, { "dpo_loss": 0.08447265625, "epoch": 0.67, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 2.505865549684722e-07, "loss": 0.3048, "projector_lr": 7.517596649054167e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19921875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.0, "sft_loss": 0.74609375, "step": 4185 }, { "dpo_loss": 0.11328125, "epoch": 0.67, "final_loss": 0.11328125, "grad_norm": 0.0, "learning_rate": 2.5036654320821694e-07, "loss": 0.0832, "projector_lr": 7.510996296246508e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.71875, "sft_loss": 0.84375, "step": 4186 }, { "dpo_loss": 0.0218505859375, "epoch": 0.67, "final_loss": 0.0218505859375, "grad_norm": 0.0, "learning_rate": 2.5014659581185285e-07, "loss": 0.0685, "projector_lr": 7.504397874355585e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.162109375, "rewards_train/margins": 5.75, "rewards_train/rejected": -5.9375, "sft_loss": 0.7265625, "step": 4187 }, { "dpo_loss": 0.2060546875, "epoch": 0.67, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 2.499267128360904e-07, "loss": 0.2799, "projector_lr": 7.497801385082713e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.8125, "sft_loss": 0.796875, "step": 4188 }, { "dpo_loss": 0.1611328125, "epoch": 0.67, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 2.497068943376224e-07, "loss": 0.1089, "projector_lr": 7.491206830128672e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.400390625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -4.53125, "sft_loss": 0.67578125, "step": 4189 }, { "dpo_loss": 0.08447265625, "epoch": 0.67, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 2.4948714037312595e-07, "loss": 0.0576, "projector_lr": 7.484614211193779e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.0, "sft_loss": 0.74609375, "step": 4190 }, { "dpo_loss": 0.275390625, "epoch": 0.67, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 2.492674509992604e-07, "loss": 0.231, "projector_lr": 7.478023529977812e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.4375, "sft_loss": 0.8125, "step": 4191 }, { "dpo_loss": 0.04931640625, "epoch": 0.67, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 2.4904782627266946e-07, "loss": 0.0478, "projector_lr": 7.471434788180084e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.90625, "sft_loss": 0.984375, "step": 4192 }, { "dpo_loss": 0.03271484375, "epoch": 0.67, "final_loss": 0.03271484375, "grad_norm": 0.0, "learning_rate": 2.4882826624997943e-07, "loss": 0.0898, "projector_lr": 7.464847987499384e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.3125, "sft_loss": 0.76171875, "step": 4193 }, { "dpo_loss": 0.0023193359375, "epoch": 0.67, "final_loss": 0.0023193359375, "grad_norm": 0.0, "learning_rate": 2.486087709878007e-07, "loss": 0.0681, "projector_lr": 7.458263129634021e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 7.8125, "rewards_train/rejected": -8.9375, "sft_loss": 0.73828125, "step": 4194 }, { "dpo_loss": 0.51171875, "epoch": 0.67, "final_loss": 0.51171875, "grad_norm": 0.0, "learning_rate": 2.4838934054272625e-07, "loss": 0.275, "projector_lr": 7.451680216281788e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.15625, "sft_loss": 0.431640625, "step": 4195 }, { "dpo_loss": 0.060791015625, "epoch": 0.67, "final_loss": 0.060791015625, "grad_norm": 0.0, "learning_rate": 2.481699749713327e-07, "loss": 0.0479, "projector_lr": 7.445099249139981e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.71875, "sft_loss": 0.625, "step": 4196 }, { "dpo_loss": 0.1689453125, "epoch": 0.67, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 2.4795067433017964e-07, "loss": 0.2073, "projector_lr": 7.43852022990539e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.46875, "sft_loss": 0.7578125, "step": 4197 }, { "dpo_loss": 0.015869140625, "epoch": 0.67, "final_loss": 0.015869140625, "grad_norm": 0.0, "learning_rate": 2.4773143867581046e-07, "loss": 0.065, "projector_lr": 7.431943160274314e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.5625, "sft_loss": 0.64453125, "step": 4198 }, { "dpo_loss": 0.1689453125, "epoch": 0.67, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 2.4751226806475137e-07, "loss": 0.1521, "projector_lr": 7.425368041942543e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 3.828125, "rewards_train/rejected": -5.46875, "sft_loss": 1.0546875, "step": 4199 }, { "dpo_loss": 0.0947265625, "epoch": 0.67, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 2.4729316255351196e-07, "loss": 0.0555, "projector_lr": 7.41879487660536e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.083984375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.46875, "sft_loss": 0.90625, "step": 4200 }, { "dpo_loss": 0.150390625, "epoch": 0.67, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 2.470741221985846e-07, "loss": 0.1261, "projector_lr": 7.41222366595754e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 7.5625, "rewards_train/rejected": -8.1875, "sft_loss": 0.65234375, "step": 4201 }, { "dpo_loss": 0.0177001953125, "epoch": 0.67, "final_loss": 0.0177001953125, "grad_norm": 0.0, "learning_rate": 2.468551470564459e-07, "loss": 0.0465, "projector_lr": 7.405654411693377e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.90625, "sft_loss": 0.890625, "step": 4202 }, { "dpo_loss": 0.18359375, "epoch": 0.67, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 2.466362371835544e-07, "loss": 0.1841, "projector_lr": 7.399087115506633e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.03125, "sft_loss": 0.89453125, "step": 4203 }, { "dpo_loss": 0.197265625, "epoch": 0.67, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 2.4641739263635316e-07, "loss": 0.1064, "projector_lr": 7.392521779090595e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.15625, "sft_loss": 0.734375, "step": 4204 }, { "dpo_loss": 0.0810546875, "epoch": 0.67, "final_loss": 0.0810546875, "grad_norm": 0.0, "learning_rate": 2.4619861347126676e-07, "loss": 0.0437, "projector_lr": 7.385958404138003e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.625, "sft_loss": 0.6171875, "step": 4205 }, { "dpo_loss": 0.07373046875, "epoch": 0.67, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 2.4597989974470447e-07, "loss": 0.1542, "projector_lr": 7.379396992341134e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.15625, "sft_loss": 0.6796875, "step": 4206 }, { "dpo_loss": 0.0220947265625, "epoch": 0.67, "final_loss": 0.0220947265625, "grad_norm": 0.0, "learning_rate": 2.457612515130576e-07, "loss": 0.173, "projector_lr": 7.372837545391728e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.419921875, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.21875, "sft_loss": 0.64453125, "step": 4207 }, { "dpo_loss": 0.0673828125, "epoch": 0.67, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 2.4554266883270147e-07, "loss": 0.0995, "projector_lr": 7.366280064981045e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49609375, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.25, "sft_loss": 0.69921875, "step": 4208 }, { "dpo_loss": 0.005706787109375, "epoch": 0.67, "final_loss": 0.005706787109375, "grad_norm": 0.0, "learning_rate": 2.4532415175999375e-07, "loss": 0.0902, "projector_lr": 7.359724552799814e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 6.71875, "rewards_train/rejected": -7.21875, "sft_loss": 0.6484375, "step": 4209 }, { "dpo_loss": 0.00994873046875, "epoch": 0.67, "final_loss": 0.00994873046875, "grad_norm": 0.0, "learning_rate": 2.451057003512756e-07, "loss": 0.1374, "projector_lr": 7.353171010538268e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.14453125, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.34375, "sft_loss": 0.6796875, "step": 4210 }, { "dpo_loss": 0.166015625, "epoch": 0.67, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 2.4488731466287085e-07, "loss": 0.2367, "projector_lr": 7.346619439886125e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.609375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.8125, "sft_loss": 1.046875, "step": 4211 }, { "dpo_loss": 0.0380859375, "epoch": 0.67, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 2.44668994751087e-07, "loss": 0.1441, "projector_lr": 7.340069842532612e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 6.40625, "rewards_train/rejected": -7.375, "sft_loss": 0.67578125, "step": 4212 }, { "dpo_loss": 0.35546875, "epoch": 0.67, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 2.444507406722142e-07, "loss": 0.2162, "projector_lr": 7.333522220166427e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.03125, "rewards_train/margins": 2.0, "rewards_train/rejected": -3.03125, "sft_loss": 0.77734375, "step": 4213 }, { "dpo_loss": 0.1005859375, "epoch": 0.67, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 2.442325524825255e-07, "loss": 0.0545, "projector_lr": 7.326976574475765e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.275390625, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.0, "sft_loss": 0.68359375, "step": 4214 }, { "dpo_loss": 0.0216064453125, "epoch": 0.67, "final_loss": 0.0216064453125, "grad_norm": 0.0, "learning_rate": 2.440144302382774e-07, "loss": 0.1464, "projector_lr": 7.320432907148322e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.21875, "sft_loss": 0.6484375, "step": 4215 }, { "dpo_loss": 0.11376953125, "epoch": 0.67, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 2.43796373995709e-07, "loss": 0.3142, "projector_lr": 7.313891219871271e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.453125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.09375, "sft_loss": 0.703125, "step": 4216 }, { "dpo_loss": 0.1572265625, "epoch": 0.67, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 2.435783838110425e-07, "loss": 0.1366, "projector_lr": 7.307351514331276e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.125, "sft_loss": 0.80078125, "step": 4217 }, { "dpo_loss": 0.322265625, "epoch": 0.67, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 2.4336045974048303e-07, "loss": 0.2381, "projector_lr": 7.300813792214492e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.203125, "sft_loss": 0.79296875, "step": 4218 }, { "dpo_loss": 0.302734375, "epoch": 0.68, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 2.4314260184021894e-07, "loss": 0.1817, "projector_lr": 7.294278055206569e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.75, "sft_loss": 0.578125, "step": 4219 }, { "dpo_loss": 0.263671875, "epoch": 0.68, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 2.429248101664211e-07, "loss": 0.2051, "projector_lr": 7.287744304992633e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 3.625, "rewards_train/rejected": -5.21875, "sft_loss": 0.8046875, "step": 4220 }, { "dpo_loss": 0.015380859375, "epoch": 0.68, "final_loss": 0.015380859375, "grad_norm": 0.0, "learning_rate": 2.4270708477524377e-07, "loss": 0.2133, "projector_lr": 7.281212543257314e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.0, "sft_loss": 0.75390625, "step": 4221 }, { "dpo_loss": 0.1376953125, "epoch": 0.68, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 2.4248942572282375e-07, "loss": 0.1728, "projector_lr": 7.274682771684714e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.3125, "sft_loss": 0.625, "step": 4222 }, { "dpo_loss": 0.2060546875, "epoch": 0.68, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 2.4227183306528087e-07, "loss": 0.1626, "projector_lr": 7.268154991958427e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.09375, "sft_loss": 0.71875, "step": 4223 }, { "dpo_loss": 0.07080078125, "epoch": 0.68, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 2.420543068587176e-07, "loss": 0.0704, "projector_lr": 7.261629205761528e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.9375, "sft_loss": 0.9765625, "step": 4224 }, { "dpo_loss": 0.09228515625, "epoch": 0.68, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 2.418368471592198e-07, "loss": 0.1767, "projector_lr": 7.255105414776594e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.1875, "sft_loss": 0.83984375, "step": 4225 }, { "dpo_loss": 1.0390625, "epoch": 0.68, "final_loss": 1.0390625, "grad_norm": 0.0, "learning_rate": 2.416194540228559e-07, "loss": 0.6816, "projector_lr": 7.248583620685677e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.265625, "rewards_train/margins": 3.15625, "rewards_train/rejected": -5.4375, "sft_loss": 0.73828125, "step": 4226 }, { "dpo_loss": 0.12255859375, "epoch": 0.68, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 2.4140212750567694e-07, "loss": 0.1512, "projector_lr": 7.242063825170309e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.5, "sft_loss": 0.70703125, "step": 4227 }, { "dpo_loss": 0.2177734375, "epoch": 0.68, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 2.41184867663717e-07, "loss": 0.3004, "projector_lr": 7.23554602991151e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.640625, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.96875, "sft_loss": 0.79296875, "step": 4228 }, { "dpo_loss": 0.041748046875, "epoch": 0.68, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 2.4096767455299324e-07, "loss": 0.0412, "projector_lr": 7.229030236589797e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.3125, "sft_loss": 0.56640625, "step": 4229 }, { "dpo_loss": 0.0164794921875, "epoch": 0.68, "final_loss": 0.0164794921875, "grad_norm": 0.0, "learning_rate": 2.407505482295049e-07, "loss": 0.0236, "projector_lr": 7.222516446885148e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 6.28125, "rewards_train/rejected": -7.125, "sft_loss": 0.5859375, "step": 4230 }, { "dpo_loss": 0.259765625, "epoch": 0.68, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 2.405334887492351e-07, "loss": 0.2846, "projector_lr": 7.216004662477055e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.125, "sft_loss": 0.7734375, "step": 4231 }, { "dpo_loss": 0.32421875, "epoch": 0.68, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 2.4031649616814834e-07, "loss": 0.2074, "projector_lr": 7.209494885044452e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.15625, "sft_loss": 0.7109375, "step": 4232 }, { "dpo_loss": 0.7578125, "epoch": 0.68, "final_loss": 0.7578125, "grad_norm": 0.0, "learning_rate": 2.400995705421931e-07, "loss": 0.4648, "projector_lr": 7.202987116265794e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.59375, "sft_loss": 0.81640625, "step": 4233 }, { "dpo_loss": 0.1171875, "epoch": 0.68, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 2.3988271192729974e-07, "loss": 0.0859, "projector_lr": 7.196481357818993e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2275390625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -4.84375, "sft_loss": 0.6796875, "step": 4234 }, { "dpo_loss": 0.353515625, "epoch": 0.68, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 2.3966592037938224e-07, "loss": 0.2173, "projector_lr": 7.189977611381469e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 2.453125, "rewards_train/rejected": -3.734375, "sft_loss": 1.015625, "step": 4235 }, { "dpo_loss": 0.208984375, "epoch": 0.68, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 2.39449195954336e-07, "loss": 0.3104, "projector_lr": 7.18347587863008e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.03125, "sft_loss": 0.58984375, "step": 4236 }, { "dpo_loss": 0.060546875, "epoch": 0.68, "final_loss": 0.060546875, "grad_norm": 0.0, "learning_rate": 2.3923253870804045e-07, "loss": 0.0575, "projector_lr": 7.176976161241214e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.0625, "sft_loss": 0.68359375, "step": 4237 }, { "dpo_loss": 0.038818359375, "epoch": 0.68, "final_loss": 0.038818359375, "grad_norm": 0.0, "learning_rate": 2.3901594869635674e-07, "loss": 0.054, "projector_lr": 7.170478460890702e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.71875, "sft_loss": 0.67578125, "step": 4238 }, { "dpo_loss": 0.279296875, "epoch": 0.68, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 2.387994259751293e-07, "loss": 0.2788, "projector_lr": 7.16398277925388e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.640625, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.375, "sft_loss": 1.0625, "step": 4239 }, { "dpo_loss": 0.07958984375, "epoch": 0.68, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 2.3858297060018487e-07, "loss": 0.0847, "projector_lr": 7.157489118005547e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.78125, "sft_loss": 0.64453125, "step": 4240 }, { "dpo_loss": 0.2353515625, "epoch": 0.68, "final_loss": 0.2353515625, "grad_norm": 0.0, "learning_rate": 2.38366582627333e-07, "loss": 0.1769, "projector_lr": 7.150997478819991e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.75, "sft_loss": 0.5546875, "step": 4241 }, { "dpo_loss": 0.162109375, "epoch": 0.68, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 2.381502621123655e-07, "loss": 0.1125, "projector_lr": 7.144507863370965e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.53125, "sft_loss": 0.82421875, "step": 4242 }, { "dpo_loss": 0.275390625, "epoch": 0.68, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 2.3793400911105743e-07, "loss": 0.1954, "projector_lr": 7.138020273331724e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.859375, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.5, "sft_loss": 0.69921875, "step": 4243 }, { "dpo_loss": 0.028076171875, "epoch": 0.68, "final_loss": 0.028076171875, "grad_norm": 0.0, "learning_rate": 2.3771782367916587e-07, "loss": 0.0466, "projector_lr": 7.131534710374977e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.03125, "sft_loss": 1.15625, "step": 4244 }, { "dpo_loss": 0.33203125, "epoch": 0.68, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 2.375017058724308e-07, "loss": 0.1924, "projector_lr": 7.125051176172924e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -3.984375, "sft_loss": 0.6484375, "step": 4245 }, { "dpo_loss": 0.322265625, "epoch": 0.68, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 2.3728565574657433e-07, "loss": 0.1961, "projector_lr": 7.11856967239723e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.49609375, "rewards_train/margins": 3.53125, "rewards_train/rejected": -4.03125, "sft_loss": 0.73828125, "step": 4246 }, { "dpo_loss": 0.05517578125, "epoch": 0.68, "final_loss": 0.05517578125, "grad_norm": 0.0, "learning_rate": 2.370696733573019e-07, "loss": 0.0287, "projector_lr": 7.112090200719057e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.875, "sft_loss": 0.59375, "step": 4247 }, { "dpo_loss": 0.023193359375, "epoch": 0.68, "final_loss": 0.023193359375, "grad_norm": 0.0, "learning_rate": 2.3685375876030062e-07, "loss": 0.0881, "projector_lr": 7.105612762809019e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.65625, "sft_loss": 0.61328125, "step": 4248 }, { "dpo_loss": 0.0458984375, "epoch": 0.68, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 2.366379120112409e-07, "loss": 0.1986, "projector_lr": 7.099137360337227e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.03125, "sft_loss": 0.6875, "step": 4249 }, { "dpo_loss": 0.125, "epoch": 0.68, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 2.3642213316577515e-07, "loss": 0.282, "projector_lr": 7.092663994973255e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.146484375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -5.84375, "sft_loss": 0.6328125, "step": 4250 }, { "dpo_loss": 0.275390625, "epoch": 0.68, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 2.362064222795383e-07, "loss": 0.6573, "projector_lr": 7.086192668386149e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.21875, "sft_loss": 0.8125, "step": 4251 }, { "dpo_loss": 0.109375, "epoch": 0.68, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 2.3599077940814775e-07, "loss": 0.1219, "projector_lr": 7.079723382244434e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1513671875, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.21875, "sft_loss": 1.4375, "step": 4252 }, { "dpo_loss": 0.119140625, "epoch": 0.68, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 2.357752046072039e-07, "loss": 0.2721, "projector_lr": 7.073256138216117e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.375, "sft_loss": 0.78125, "step": 4253 }, { "dpo_loss": 0.1748046875, "epoch": 0.68, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 2.3555969793228886e-07, "loss": 0.1544, "projector_lr": 7.066790937968666e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.8125, "sft_loss": 0.7890625, "step": 4254 }, { "dpo_loss": 0.26953125, "epoch": 0.68, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 2.3534425943896742e-07, "loss": 0.2235, "projector_lr": 7.060327783169022e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.09375, "sft_loss": 0.65625, "step": 4255 }, { "dpo_loss": 0.123046875, "epoch": 0.68, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 2.3512888918278712e-07, "loss": 0.0733, "projector_lr": 7.053866675483614e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -3.71875, "sft_loss": 0.6640625, "step": 4256 }, { "dpo_loss": 0.04052734375, "epoch": 0.68, "final_loss": 0.04052734375, "grad_norm": 0.0, "learning_rate": 2.3491358721927755e-07, "loss": 0.1738, "projector_lr": 7.047407616578327e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 6.65625, "rewards_train/rejected": -8.0625, "sft_loss": 0.6328125, "step": 4257 }, { "dpo_loss": 0.09375, "epoch": 0.68, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 2.3469835360395078e-07, "loss": 0.0911, "projector_lr": 7.040950608118524e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53125, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.46875, "sft_loss": 0.70703125, "step": 4258 }, { "dpo_loss": 0.1494140625, "epoch": 0.68, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 2.3448318839230108e-07, "loss": 0.147, "projector_lr": 7.034495651769033e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.3125, "sft_loss": 0.85546875, "step": 4259 }, { "dpo_loss": 0.333984375, "epoch": 0.68, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 2.342680916398056e-07, "loss": 0.2075, "projector_lr": 7.028042749194168e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.28125, "sft_loss": 0.9375, "step": 4260 }, { "dpo_loss": 0.2099609375, "epoch": 0.68, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 2.3405306340192322e-07, "loss": 0.196, "projector_lr": 7.021591902057697e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.71875, "sft_loss": 1.2578125, "step": 4261 }, { "dpo_loss": 0.04931640625, "epoch": 0.68, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 2.338381037340959e-07, "loss": 0.1058, "projector_lr": 7.015143112022878e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.84375, "sft_loss": 0.74609375, "step": 4262 }, { "dpo_loss": 0.005950927734375, "epoch": 0.68, "final_loss": 0.005950927734375, "grad_norm": 0.0, "learning_rate": 2.3362321269174672e-07, "loss": 0.0082, "projector_lr": 7.008696380752403e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 7.375, "rewards_train/rejected": -7.59375, "sft_loss": 0.625, "step": 4263 }, { "dpo_loss": 0.00390625, "epoch": 0.68, "final_loss": 0.00390625, "grad_norm": 0.0, "learning_rate": 2.3340839033028248e-07, "loss": 0.0444, "projector_lr": 7.002251709908475e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1845703125, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.21875, "sft_loss": 0.51953125, "step": 4264 }, { "dpo_loss": 0.314453125, "epoch": 0.68, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 2.3319363670509118e-07, "loss": 0.295, "projector_lr": 6.995809101152736e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.03125, "sft_loss": 0.81640625, "step": 4265 }, { "dpo_loss": 0.1298828125, "epoch": 0.68, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 2.3297895187154382e-07, "loss": 0.0868, "projector_lr": 6.989368556146315e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.036865234375, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.15625, "sft_loss": 0.85546875, "step": 4266 }, { "dpo_loss": 0.017578125, "epoch": 0.68, "final_loss": 0.017578125, "grad_norm": 0.0, "learning_rate": 2.327643358849933e-07, "loss": 0.1767, "projector_lr": 6.982930076549799e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -7.0, "sft_loss": 0.9609375, "step": 4267 }, { "dpo_loss": 0.228515625, "epoch": 0.68, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 2.325497888007747e-07, "loss": 0.1845, "projector_lr": 6.976493664023242e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.546875, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.28125, "sft_loss": 0.65625, "step": 4268 }, { "dpo_loss": 0.287109375, "epoch": 0.68, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 2.323353106742053e-07, "loss": 0.2581, "projector_lr": 6.97005932022616e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 3.65625, "rewards_train/rejected": -3.921875, "sft_loss": 1.0234375, "step": 4269 }, { "dpo_loss": 0.2470703125, "epoch": 0.68, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 2.3212090156058528e-07, "loss": 0.1511, "projector_lr": 6.963627046817559e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.59375, "sft_loss": 0.9140625, "step": 4270 }, { "dpo_loss": 0.1435546875, "epoch": 0.68, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 2.3190656151519615e-07, "loss": 0.1551, "projector_lr": 6.957196845455885e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.96875, "sft_loss": 0.890625, "step": 4271 }, { "dpo_loss": 0.087890625, "epoch": 0.68, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 2.3169229059330214e-07, "loss": 0.148, "projector_lr": 6.950768717799065e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.75390625, "rewards_train/margins": 5.9375, "rewards_train/rejected": -5.15625, "sft_loss": 0.54296875, "step": 4272 }, { "dpo_loss": 0.0145263671875, "epoch": 0.68, "final_loss": 0.0145263671875, "grad_norm": 0.0, "learning_rate": 2.3147808885014913e-07, "loss": 0.1772, "projector_lr": 6.944342665504474e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.5, "sft_loss": 0.6484375, "step": 4273 }, { "dpo_loss": 0.12890625, "epoch": 0.68, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 2.3126395634096607e-07, "loss": 0.2824, "projector_lr": 6.937918690228982e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.4375, "sft_loss": 1.0703125, "step": 4274 }, { "dpo_loss": 0.3125, "epoch": 0.68, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 2.31049893120963e-07, "loss": 0.2169, "projector_lr": 6.931496793628891e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.25, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.484375, "sft_loss": 0.8125, "step": 4275 }, { "dpo_loss": 0.169921875, "epoch": 0.68, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 2.3083589924533327e-07, "loss": 0.1952, "projector_lr": 6.925076977359999e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.40625, "sft_loss": 1.3359375, "step": 4276 }, { "dpo_loss": 0.240234375, "epoch": 0.68, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 2.3062197476925084e-07, "loss": 0.1284, "projector_lr": 6.918659243077525e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.9375, "sft_loss": 1.3125, "step": 4277 }, { "dpo_loss": 0.21875, "epoch": 0.68, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 2.3040811974787328e-07, "loss": 0.1924, "projector_lr": 6.912243592436199e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 3.671875, "rewards_train/rejected": -4.5, "sft_loss": 1.140625, "step": 4278 }, { "dpo_loss": 0.01373291015625, "epoch": 0.68, "final_loss": 0.01373291015625, "grad_norm": 0.0, "learning_rate": 2.301943342363392e-07, "loss": 0.0706, "projector_lr": 6.905830027090177e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2734375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.28125, "sft_loss": 0.77734375, "step": 4279 }, { "dpo_loss": 0.07763671875, "epoch": 0.68, "final_loss": 0.07763671875, "grad_norm": 0.0, "learning_rate": 2.2998061828977023e-07, "loss": 0.1011, "projector_lr": 6.899418548693107e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.625, "sft_loss": 0.7578125, "step": 4280 }, { "dpo_loss": 0.1005859375, "epoch": 0.68, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 2.2976697196326884e-07, "loss": 0.1243, "projector_lr": 6.893009158898066e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1162109375, "rewards_train/margins": 5.53125, "rewards_train/rejected": -5.65625, "sft_loss": 0.43359375, "step": 4281 }, { "dpo_loss": 0.1669921875, "epoch": 0.69, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 2.2955339531192064e-07, "loss": 0.2742, "projector_lr": 6.886601859357619e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.0625, "sft_loss": 0.66796875, "step": 4282 }, { "dpo_loss": 0.91015625, "epoch": 0.69, "final_loss": 0.91015625, "grad_norm": 0.0, "learning_rate": 2.2933988839079266e-07, "loss": 0.8712, "projector_lr": 6.88019665172378e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 1.7265625, "rewards_train/rejected": -3.078125, "sft_loss": 0.58203125, "step": 4283 }, { "dpo_loss": 0.208984375, "epoch": 0.69, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 2.291264512549344e-07, "loss": 0.1132, "projector_lr": 6.873793537648032e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.28125, "sft_loss": 0.80859375, "step": 4284 }, { "dpo_loss": 0.11376953125, "epoch": 0.69, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 2.2891308395937703e-07, "loss": 0.1897, "projector_lr": 6.867392518781311e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.09375, "sft_loss": 0.75, "step": 4285 }, { "dpo_loss": 0.15625, "epoch": 0.69, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 2.286997865591338e-07, "loss": 0.1719, "projector_lr": 6.860993596774015e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.21875, "sft_loss": 0.70703125, "step": 4286 }, { "dpo_loss": 0.0086669921875, "epoch": 0.69, "final_loss": 0.0086669921875, "grad_norm": 0.0, "learning_rate": 2.2848655910919966e-07, "loss": 0.0699, "projector_lr": 6.854596773275991e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.90625, "sft_loss": 0.72265625, "step": 4287 }, { "dpo_loss": 0.002685546875, "epoch": 0.69, "final_loss": 0.002685546875, "grad_norm": 0.0, "learning_rate": 2.2827340166455235e-07, "loss": 0.0419, "projector_lr": 6.84820204993657e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4453125, "rewards_train/margins": 7.34375, "rewards_train/rejected": -7.8125, "sft_loss": 0.8125, "step": 4288 }, { "dpo_loss": 0.00616455078125, "epoch": 0.69, "final_loss": 0.00616455078125, "grad_norm": 0.0, "learning_rate": 2.2806031428015066e-07, "loss": 0.273, "projector_lr": 6.84180942840452e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.296875, "rewards_train/margins": 8.3125, "rewards_train/rejected": -8.625, "sft_loss": 0.68359375, "step": 4289 }, { "dpo_loss": 0.51953125, "epoch": 0.69, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 2.2784729701093557e-07, "loss": 0.3317, "projector_lr": 6.835418910328068e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.84375, "rewards_train/margins": 3.09375, "rewards_train/rejected": -4.9375, "sft_loss": 0.76953125, "step": 4290 }, { "dpo_loss": 0.1953125, "epoch": 0.69, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 2.276343499118305e-07, "loss": 0.2071, "projector_lr": 6.829030497354915e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.0, "sft_loss": 0.9453125, "step": 4291 }, { "dpo_loss": 0.3671875, "epoch": 0.69, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 2.274214730377401e-07, "loss": 0.3443, "projector_lr": 6.822644191132204e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.25, "sft_loss": 0.73046875, "step": 4292 }, { "dpo_loss": 0.82421875, "epoch": 0.69, "final_loss": 0.82421875, "grad_norm": 0.0, "learning_rate": 2.2720866644355107e-07, "loss": 0.5016, "projector_lr": 6.816259993306533e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.96875, "sft_loss": 0.8671875, "step": 4293 }, { "dpo_loss": 0.12158203125, "epoch": 0.69, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 2.2699593018413238e-07, "loss": 0.1679, "projector_lr": 6.809877905523972e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4140625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.625, "sft_loss": 0.7578125, "step": 4294 }, { "dpo_loss": 0.150390625, "epoch": 0.69, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 2.2678326431433454e-07, "loss": 0.2451, "projector_lr": 6.803497929430036e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.78125, "sft_loss": 0.51953125, "step": 4295 }, { "dpo_loss": 0.345703125, "epoch": 0.69, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 2.2657066888898957e-07, "loss": 0.2268, "projector_lr": 6.797120066669687e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.65625, "sft_loss": 0.83203125, "step": 4296 }, { "dpo_loss": 0.4140625, "epoch": 0.69, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 2.263581439629123e-07, "loss": 0.2248, "projector_lr": 6.790744318887369e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 3.25, "rewards_train/rejected": -3.859375, "sft_loss": 0.62890625, "step": 4297 }, { "dpo_loss": 0.0830078125, "epoch": 0.69, "final_loss": 0.0830078125, "grad_norm": 0.0, "learning_rate": 2.2614568959089842e-07, "loss": 0.0774, "projector_lr": 6.784370687726953e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.328125, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.21875, "sft_loss": 0.56640625, "step": 4298 }, { "dpo_loss": 0.04443359375, "epoch": 0.69, "final_loss": 0.04443359375, "grad_norm": 0.0, "learning_rate": 2.2593330582772595e-07, "loss": 0.1194, "projector_lr": 6.777999174831779e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2373046875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -5.9375, "sft_loss": 0.7421875, "step": 4299 }, { "dpo_loss": 0.298828125, "epoch": 0.69, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 2.2572099272815432e-07, "loss": 0.2834, "projector_lr": 6.77162978184463e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.5625, "sft_loss": 0.5078125, "step": 4300 }, { "dpo_loss": 0.0908203125, "epoch": 0.69, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 2.255087503469253e-07, "loss": 0.1329, "projector_lr": 6.765262510407759e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 4.25, "rewards_train/rejected": -6.125, "sft_loss": 0.8984375, "step": 4301 }, { "dpo_loss": 0.0014801025390625, "epoch": 0.69, "final_loss": 0.0014801025390625, "grad_norm": 0.0, "learning_rate": 2.252965787387618e-07, "loss": 0.0962, "projector_lr": 6.758897362162854e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 8.625, "rewards_train/rejected": -9.5, "sft_loss": 0.73828125, "step": 4302 }, { "dpo_loss": 0.1591796875, "epoch": 0.69, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 2.2508447795836933e-07, "loss": 0.2044, "projector_lr": 6.75253433875108e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.828125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.6875, "sft_loss": 0.91015625, "step": 4303 }, { "dpo_loss": 0.052001953125, "epoch": 0.69, "final_loss": 0.052001953125, "grad_norm": 0.0, "learning_rate": 2.2487244806043388e-07, "loss": 0.0473, "projector_lr": 6.746173441813017e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.78125, "sft_loss": 0.640625, "step": 4304 }, { "dpo_loss": 0.44921875, "epoch": 0.69, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 2.246604890996243e-07, "loss": 0.2307, "projector_lr": 6.73981467298873e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.078125, "rewards_train/margins": 2.28125, "rewards_train/rejected": -3.359375, "sft_loss": 0.63671875, "step": 4305 }, { "dpo_loss": 0.10498046875, "epoch": 0.69, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 2.244486011305906e-07, "loss": 0.1189, "projector_lr": 6.733458033917719e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.6875, "sft_loss": 1.1328125, "step": 4306 }, { "dpo_loss": 0.0032806396484375, "epoch": 0.69, "final_loss": 0.0032806396484375, "grad_norm": 0.0, "learning_rate": 2.24236784207965e-07, "loss": 0.0477, "projector_lr": 6.72710352623895e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 8.1875, "rewards_train/rejected": -8.5625, "sft_loss": 0.51953125, "step": 4307 }, { "dpo_loss": 0.21484375, "epoch": 0.69, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 2.240250383863603e-07, "loss": 0.1235, "projector_lr": 6.720751151590809e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3203125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -3.921875, "sft_loss": 0.796875, "step": 4308 }, { "dpo_loss": 0.08837890625, "epoch": 0.69, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 2.2381336372037223e-07, "loss": 0.2871, "projector_lr": 6.714400911611168e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.515625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.28125, "sft_loss": 0.734375, "step": 4309 }, { "dpo_loss": 0.2373046875, "epoch": 0.69, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 2.2360176026457733e-07, "loss": 0.1879, "projector_lr": 6.70805280793732e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.0625, "sft_loss": 0.84375, "step": 4310 }, { "dpo_loss": 0.15625, "epoch": 0.69, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 2.2339022807353442e-07, "loss": 0.1436, "projector_lr": 6.701706842206033e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.09375, "sft_loss": 0.65625, "step": 4311 }, { "dpo_loss": 0.15625, "epoch": 0.69, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 2.2317876720178336e-07, "loss": 0.3004, "projector_lr": 6.695363016053501e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.84375, "sft_loss": 0.97265625, "step": 4312 }, { "dpo_loss": 0.09814453125, "epoch": 0.69, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 2.2296737770384595e-07, "loss": 0.2189, "projector_lr": 6.689021331115379e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.1875, "sft_loss": 0.92578125, "step": 4313 }, { "dpo_loss": 0.32421875, "epoch": 0.69, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 2.2275605963422528e-07, "loss": 0.3572, "projector_lr": 6.682681789026759e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.703125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.4375, "sft_loss": 0.6484375, "step": 4314 }, { "dpo_loss": 0.12158203125, "epoch": 0.69, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 2.2254481304740663e-07, "loss": 0.1316, "projector_lr": 6.6763443914222e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.28125, "sft_loss": 0.72265625, "step": 4315 }, { "dpo_loss": 0.412109375, "epoch": 0.69, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 2.2233363799785626e-07, "loss": 0.2993, "projector_lr": 6.670009139935689e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.53125, "sft_loss": 0.63671875, "step": 4316 }, { "dpo_loss": 0.5546875, "epoch": 0.69, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 2.2212253454002222e-07, "loss": 0.3467, "projector_lr": 6.663676036200668e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.03125, "sft_loss": 0.98828125, "step": 4317 }, { "dpo_loss": 0.21484375, "epoch": 0.69, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 2.2191150272833386e-07, "loss": 0.2365, "projector_lr": 6.657345081850017e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.28125, "sft_loss": 0.71875, "step": 4318 }, { "dpo_loss": 0.38671875, "epoch": 0.69, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 2.2170054261720267e-07, "loss": 0.4361, "projector_lr": 6.651016278516081e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 3.875, "rewards_train/rejected": -6.1875, "sft_loss": 0.6796875, "step": 4319 }, { "dpo_loss": 0.1123046875, "epoch": 0.69, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 2.214896542610209e-07, "loss": 0.0783, "projector_lr": 6.644689627830627e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3671875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.1875, "sft_loss": 0.703125, "step": 4320 }, { "dpo_loss": 0.392578125, "epoch": 0.69, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 2.2127883771416312e-07, "loss": 0.2755, "projector_lr": 6.638365131424894e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 2.53125, "rewards_train/rejected": -4.0, "sft_loss": 0.75390625, "step": 4321 }, { "dpo_loss": 0.058349609375, "epoch": 0.69, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 2.2106809303098428e-07, "loss": 0.0673, "projector_lr": 6.632042790929529e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.5, "sft_loss": 0.8203125, "step": 4322 }, { "dpo_loss": 0.078125, "epoch": 0.69, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 2.2085742026582193e-07, "loss": 0.2312, "projector_lr": 6.625722607974658e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.53125, "sft_loss": 0.8125, "step": 4323 }, { "dpo_loss": 0.0703125, "epoch": 0.69, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 2.2064681947299424e-07, "loss": 0.0395, "projector_lr": 6.619404584189828e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.4375, "sft_loss": 0.765625, "step": 4324 }, { "dpo_loss": 0.52734375, "epoch": 0.69, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 2.2043629070680153e-07, "loss": 0.3741, "projector_lr": 6.613088721204046e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.9375, "sft_loss": 0.921875, "step": 4325 }, { "dpo_loss": 0.1826171875, "epoch": 0.69, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 2.20225834021525e-07, "loss": 0.0945, "projector_lr": 6.60677502064575e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.8125, "sft_loss": 0.5234375, "step": 4326 }, { "dpo_loss": 0.43359375, "epoch": 0.69, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 2.2001544947142753e-07, "loss": 0.2781, "projector_lr": 6.600463484142827e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.53125, "sft_loss": 0.6796875, "step": 4327 }, { "dpo_loss": 0.1884765625, "epoch": 0.69, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 2.19805137110753e-07, "loss": 0.0951, "projector_lr": 6.59415411332259e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.6875, "sft_loss": 0.55078125, "step": 4328 }, { "dpo_loss": 0.1474609375, "epoch": 0.69, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 2.1959489699372746e-07, "loss": 0.2093, "projector_lr": 6.587846909811824e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3828125, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.71875, "sft_loss": 0.73828125, "step": 4329 }, { "dpo_loss": 0.45703125, "epoch": 0.69, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 2.1938472917455769e-07, "loss": 0.4665, "projector_lr": 6.581541875236731e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.25, "sft_loss": 0.75390625, "step": 4330 }, { "dpo_loss": 0.083984375, "epoch": 0.69, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 2.19174633707432e-07, "loss": 0.2337, "projector_lr": 6.57523901122296e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.75, "sft_loss": 0.80859375, "step": 4331 }, { "dpo_loss": 0.369140625, "epoch": 0.69, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 2.1896461064651988e-07, "loss": 0.3308, "projector_lr": 6.568938319395597e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.359375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -4.0625, "sft_loss": 0.94921875, "step": 4332 }, { "dpo_loss": 0.1142578125, "epoch": 0.69, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 2.187546600459727e-07, "loss": 0.1646, "projector_lr": 6.562639801379182e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.40625, "sft_loss": 0.6484375, "step": 4333 }, { "dpo_loss": 0.12890625, "epoch": 0.69, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 2.1854478195992243e-07, "loss": 0.1439, "projector_lr": 6.556343458797674e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.28125, "sft_loss": 0.73828125, "step": 4334 }, { "dpo_loss": 0.0703125, "epoch": 0.69, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 2.18334976442483e-07, "loss": 0.0362, "projector_lr": 6.55004929327449e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.3125, "sft_loss": 0.76171875, "step": 4335 }, { "dpo_loss": 0.07275390625, "epoch": 0.69, "final_loss": 0.07275390625, "grad_norm": 0.0, "learning_rate": 2.1812524354774914e-07, "loss": 0.1594, "projector_lr": 6.543757306432475e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 6.5625, "rewards_train/rejected": -7.34375, "sft_loss": 0.578125, "step": 4336 }, { "dpo_loss": 0.05908203125, "epoch": 0.69, "final_loss": 0.05908203125, "grad_norm": 0.0, "learning_rate": 2.1791558332979694e-07, "loss": 0.0829, "projector_lr": 6.537467499893909e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.0, "sft_loss": 0.8359375, "step": 4337 }, { "dpo_loss": 0.2890625, "epoch": 0.69, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 2.177059958426841e-07, "loss": 0.1537, "projector_lr": 6.531179875280524e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.8125, "sft_loss": 0.8671875, "step": 4338 }, { "dpo_loss": 0.0208740234375, "epoch": 0.69, "final_loss": 0.0208740234375, "grad_norm": 0.0, "learning_rate": 2.174964811404492e-07, "loss": 0.1012, "projector_lr": 6.524894434213477e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.59375, "sft_loss": 0.703125, "step": 4339 }, { "dpo_loss": 0.057861328125, "epoch": 0.69, "final_loss": 0.057861328125, "grad_norm": 0.0, "learning_rate": 2.1728703927711222e-07, "loss": 0.2716, "projector_lr": 6.518611178313367e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.96875, "sft_loss": 0.7421875, "step": 4340 }, { "dpo_loss": 0.302734375, "epoch": 0.69, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 2.1707767030667402e-07, "loss": 0.2849, "projector_lr": 6.512330109200221e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.49609375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.75, "sft_loss": 0.640625, "step": 4341 }, { "dpo_loss": 0.396484375, "epoch": 0.69, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 2.168683742831174e-07, "loss": 0.2542, "projector_lr": 6.506051228493522e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.0, "sft_loss": 0.6171875, "step": 4342 }, { "dpo_loss": 0.158203125, "epoch": 0.69, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 2.1665915126040546e-07, "loss": 0.1572, "projector_lr": 6.499774537812164e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.125, "sft_loss": 0.6796875, "step": 4343 }, { "dpo_loss": 0.12158203125, "epoch": 0.7, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 2.1645000129248353e-07, "loss": 0.1479, "projector_lr": 6.493500038774506e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.25, "sft_loss": 0.6015625, "step": 4344 }, { "dpo_loss": 0.12109375, "epoch": 0.7, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 2.162409244332768e-07, "loss": 0.1129, "projector_lr": 6.487227732998304e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.625, "sft_loss": 0.84375, "step": 4345 }, { "dpo_loss": 0.1494140625, "epoch": 0.7, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 2.1603192073669273e-07, "loss": 0.0988, "projector_lr": 6.480957622100782e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.71875, "sft_loss": 0.5234375, "step": 4346 }, { "dpo_loss": 0.0419921875, "epoch": 0.7, "final_loss": 0.0419921875, "grad_norm": 0.0, "learning_rate": 2.1582299025661928e-07, "loss": 0.1752, "projector_lr": 6.474689707698578e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.5, "sft_loss": 0.7265625, "step": 4347 }, { "dpo_loss": 0.251953125, "epoch": 0.7, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 2.1561413304692623e-07, "loss": 0.1554, "projector_lr": 6.468423991407787e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.609375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.1875, "sft_loss": 0.72265625, "step": 4348 }, { "dpo_loss": 0.0233154296875, "epoch": 0.7, "final_loss": 0.0233154296875, "grad_norm": 0.0, "learning_rate": 2.1540534916146318e-07, "loss": 0.0185, "projector_lr": 6.462160474843896e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31640625, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.28125, "sft_loss": 0.59375, "step": 4349 }, { "dpo_loss": 0.158203125, "epoch": 0.7, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 2.151966386540623e-07, "loss": 0.0884, "projector_lr": 6.455899159621869e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.53125, "sft_loss": 0.76171875, "step": 4350 }, { "dpo_loss": 0.349609375, "epoch": 0.7, "final_loss": 0.349609375, "grad_norm": 0.0, "learning_rate": 2.1498800157853575e-07, "loss": 0.2151, "projector_lr": 6.449640047356073e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.375, "sft_loss": 0.8828125, "step": 4351 }, { "dpo_loss": 0.05322265625, "epoch": 0.7, "final_loss": 0.05322265625, "grad_norm": 0.0, "learning_rate": 2.1477943798867755e-07, "loss": 0.1507, "projector_lr": 6.443383139660327e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.068359375, "rewards_train/margins": 4.875, "rewards_train/rejected": -4.78125, "sft_loss": 0.72265625, "step": 4352 }, { "dpo_loss": 0.2333984375, "epoch": 0.7, "final_loss": 0.2333984375, "grad_norm": 0.0, "learning_rate": 2.145709479382622e-07, "loss": 0.6511, "projector_lr": 6.437128438147866e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.65625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.84375, "sft_loss": 0.859375, "step": 4353 }, { "dpo_loss": 0.1083984375, "epoch": 0.7, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 2.1436253148104543e-07, "loss": 0.0917, "projector_lr": 6.430875944431363e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.392578125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.40625, "sft_loss": 0.875, "step": 4354 }, { "dpo_loss": 0.609375, "epoch": 0.7, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 2.1415418867076385e-07, "loss": 0.3271, "projector_lr": 6.424625660122915e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.75, "rewards_train/margins": 2.921875, "rewards_train/rejected": -3.671875, "sft_loss": 0.77734375, "step": 4355 }, { "dpo_loss": 0.068359375, "epoch": 0.7, "final_loss": 0.068359375, "grad_norm": 0.0, "learning_rate": 2.139459195611356e-07, "loss": 0.0631, "projector_lr": 6.418377586834069e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.09375, "sft_loss": 0.5390625, "step": 4356 }, { "dpo_loss": 0.1357421875, "epoch": 0.7, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 2.1373772420585927e-07, "loss": 0.1869, "projector_lr": 6.412131726175779e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.40625, "sft_loss": 0.76171875, "step": 4357 }, { "dpo_loss": 0.166015625, "epoch": 0.7, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 2.1352960265861464e-07, "loss": 0.1596, "projector_lr": 6.40588807975844e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.84375, "sft_loss": 0.83984375, "step": 4358 }, { "dpo_loss": 0.052001953125, "epoch": 0.7, "final_loss": 0.052001953125, "grad_norm": 0.0, "learning_rate": 2.133215549730622e-07, "loss": 0.0484, "projector_lr": 6.399646649191867e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.455078125, "rewards_train/margins": 4.5, "rewards_train/rejected": -4.9375, "sft_loss": 0.73046875, "step": 4359 }, { "dpo_loss": 0.431640625, "epoch": 0.7, "final_loss": 0.431640625, "grad_norm": 0.0, "learning_rate": 2.13113581202844e-07, "loss": 0.2986, "projector_lr": 6.39340743608532e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.59375, "sft_loss": 0.91796875, "step": 4360 }, { "dpo_loss": 0.392578125, "epoch": 0.7, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 2.129056814015824e-07, "loss": 0.2429, "projector_lr": 6.387170442047471e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 3.015625, "rewards_train/rejected": -4.40625, "sft_loss": 0.88671875, "step": 4361 }, { "dpo_loss": 0.07470703125, "epoch": 0.7, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 2.1269785562288134e-07, "loss": 0.0546, "projector_lr": 6.380935668686441e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06005859375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.0, "sft_loss": 0.77734375, "step": 4362 }, { "dpo_loss": 0.0751953125, "epoch": 0.7, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 2.124901039203247e-07, "loss": 0.2258, "projector_lr": 6.374703117609741e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.8125, "sft_loss": 0.7578125, "step": 4363 }, { "dpo_loss": 0.041748046875, "epoch": 0.7, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 2.1228242634747833e-07, "loss": 0.0949, "projector_lr": 6.36847279042435e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.78125, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.90625, "sft_loss": 1.0703125, "step": 4364 }, { "dpo_loss": 0.10400390625, "epoch": 0.7, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 2.1207482295788808e-07, "loss": 0.0868, "projector_lr": 6.362244688736643e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.3125, "sft_loss": 0.87109375, "step": 4365 }, { "dpo_loss": 0.3828125, "epoch": 0.7, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 2.118672938050817e-07, "loss": 0.268, "projector_lr": 6.356018814152452e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.09375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.6875, "sft_loss": 0.65234375, "step": 4366 }, { "dpo_loss": 0.01519775390625, "epoch": 0.7, "final_loss": 0.01519775390625, "grad_norm": 0.0, "learning_rate": 2.1165983894256646e-07, "loss": 0.0785, "projector_lr": 6.349795168276994e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.59375, "sft_loss": 0.63671875, "step": 4367 }, { "dpo_loss": 0.1259765625, "epoch": 0.7, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 2.114524584238317e-07, "loss": 0.2914, "projector_lr": 6.343573752714951e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.384765625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.6875, "sft_loss": 0.8203125, "step": 4368 }, { "dpo_loss": 0.29296875, "epoch": 0.7, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 2.112451523023467e-07, "loss": 0.2666, "projector_lr": 6.337354569070401e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.3125, "sft_loss": 0.66796875, "step": 4369 }, { "dpo_loss": 0.2392578125, "epoch": 0.7, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 2.1103792063156229e-07, "loss": 0.1592, "projector_lr": 6.331137618946869e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.5, "sft_loss": 0.80078125, "step": 4370 }, { "dpo_loss": 0.1171875, "epoch": 0.7, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 2.1083076346490968e-07, "loss": 0.3004, "projector_lr": 6.324922903947291e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.0, "sft_loss": 0.69140625, "step": 4371 }, { "dpo_loss": 0.7109375, "epoch": 0.7, "final_loss": 0.7109375, "grad_norm": 0.0, "learning_rate": 2.1062368085580085e-07, "loss": 0.3662, "projector_lr": 6.318710425674026e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.6875, "sft_loss": 0.6875, "step": 4372 }, { "dpo_loss": 0.10595703125, "epoch": 0.7, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 2.1041667285762848e-07, "loss": 0.1435, "projector_lr": 6.312500185728855e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.8125, "sft_loss": 0.5859375, "step": 4373 }, { "dpo_loss": 0.205078125, "epoch": 0.7, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 2.102097395237666e-07, "loss": 0.4124, "projector_lr": 6.306292185712997e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.9375, "sft_loss": 0.79296875, "step": 4374 }, { "dpo_loss": 0.359375, "epoch": 0.7, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 2.1000288090756935e-07, "loss": 0.3048, "projector_lr": 6.300086427227081e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 2.390625, "rewards_train/rejected": -4.09375, "sft_loss": 0.859375, "step": 4375 }, { "dpo_loss": 0.390625, "epoch": 0.7, "final_loss": 0.390625, "grad_norm": 0.0, "learning_rate": 2.0979609706237166e-07, "loss": 0.2425, "projector_lr": 6.29388291187115e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.9375, "sft_loss": 0.828125, "step": 4376 }, { "dpo_loss": 0.07958984375, "epoch": 0.7, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 2.0958938804148978e-07, "loss": 0.1754, "projector_lr": 6.287681641244694e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.78125, "sft_loss": 0.609375, "step": 4377 }, { "dpo_loss": 0.41796875, "epoch": 0.7, "final_loss": 0.41796875, "grad_norm": 0.0, "learning_rate": 2.0938275389821996e-07, "loss": 0.3939, "projector_lr": 6.2814826169466e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.25, "sft_loss": 0.84375, "step": 4378 }, { "dpo_loss": 0.0341796875, "epoch": 0.7, "final_loss": 0.0341796875, "grad_norm": 0.0, "learning_rate": 2.0917619468583925e-07, "loss": 0.106, "projector_lr": 6.275285840575178e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.40625, "sft_loss": 0.796875, "step": 4379 }, { "dpo_loss": 0.056640625, "epoch": 0.7, "final_loss": 0.056640625, "grad_norm": 0.0, "learning_rate": 2.0896971045760598e-07, "loss": 0.1589, "projector_lr": 6.269091313728179e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.1875, "sft_loss": 0.7890625, "step": 4380 }, { "dpo_loss": 0.0703125, "epoch": 0.7, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 2.087633012667585e-07, "loss": 0.1361, "projector_lr": 6.262899038002756e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.28125, "sft_loss": 1.2265625, "step": 4381 }, { "dpo_loss": 0.03955078125, "epoch": 0.7, "final_loss": 0.03955078125, "grad_norm": 0.0, "learning_rate": 2.085569671665159e-07, "loss": 0.3467, "projector_lr": 6.256709014995477e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34375, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.125, "sft_loss": 0.53515625, "step": 4382 }, { "dpo_loss": 0.09326171875, "epoch": 0.7, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 2.0835070821007834e-07, "loss": 0.2567, "projector_lr": 6.25052124630235e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.59375, "sft_loss": 1.1484375, "step": 4383 }, { "dpo_loss": 0.00994873046875, "epoch": 0.7, "final_loss": 0.00994873046875, "grad_norm": 0.0, "learning_rate": 2.0814452445062614e-07, "loss": 0.1193, "projector_lr": 6.244335733518785e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.625, "sft_loss": 1.03125, "step": 4384 }, { "dpo_loss": 0.38671875, "epoch": 0.7, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 2.0793841594132038e-07, "loss": 0.1965, "projector_lr": 6.238152478239613e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 3.375, "rewards_train/rejected": -5.25, "sft_loss": 0.734375, "step": 4385 }, { "dpo_loss": 0.0093994140625, "epoch": 0.7, "final_loss": 0.0093994140625, "grad_norm": 0.0, "learning_rate": 2.0773238273530264e-07, "loss": 0.0531, "projector_lr": 6.231971482059079e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 7.0625, "rewards_train/rejected": -8.0, "sft_loss": 1.0703125, "step": 4386 }, { "dpo_loss": 0.06298828125, "epoch": 0.7, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 2.0752642488569556e-07, "loss": 0.0702, "projector_lr": 6.225792746570867e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.265625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.9375, "sft_loss": 0.53515625, "step": 4387 }, { "dpo_loss": 0.205078125, "epoch": 0.7, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 2.073205424456016e-07, "loss": 0.2769, "projector_lr": 6.219616273368048e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -6.15625, "sft_loss": 0.95703125, "step": 4388 }, { "dpo_loss": 0.03759765625, "epoch": 0.7, "final_loss": 0.03759765625, "grad_norm": 0.0, "learning_rate": 2.0711473546810466e-07, "loss": 0.2278, "projector_lr": 6.21344206404314e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.0, "sft_loss": 0.6328125, "step": 4389 }, { "dpo_loss": 0.115234375, "epoch": 0.7, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 2.069090040062681e-07, "loss": 0.075, "projector_lr": 6.207270120188043e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.21875, "sft_loss": 0.88671875, "step": 4390 }, { "dpo_loss": 0.1396484375, "epoch": 0.7, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 2.0670334811313683e-07, "loss": 0.257, "projector_lr": 6.201100443394106e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.125, "sft_loss": 0.62890625, "step": 4391 }, { "dpo_loss": 0.37890625, "epoch": 0.7, "final_loss": 0.37890625, "grad_norm": 0.0, "learning_rate": 2.064977678417355e-07, "loss": 0.2007, "projector_lr": 6.194933035252066e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.875, "sft_loss": 0.703125, "step": 4392 }, { "dpo_loss": 0.490234375, "epoch": 0.7, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 2.0629226324507028e-07, "loss": 0.2628, "projector_lr": 6.188767897352108e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.4375, "sft_loss": 0.83984375, "step": 4393 }, { "dpo_loss": 0.0810546875, "epoch": 0.7, "final_loss": 0.0810546875, "grad_norm": 0.0, "learning_rate": 2.0608683437612622e-07, "loss": 0.0579, "projector_lr": 6.182605031283787e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.5, "sft_loss": 0.68359375, "step": 4394 }, { "dpo_loss": 0.04638671875, "epoch": 0.7, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 2.0588148128787054e-07, "loss": 0.0348, "projector_lr": 6.176444438636116e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 7.1875, "rewards_train/rejected": -8.125, "sft_loss": 0.6015625, "step": 4395 }, { "dpo_loss": 0.1923828125, "epoch": 0.7, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 2.0567620403324964e-07, "loss": 0.1397, "projector_lr": 6.170286120997489e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.09375, "sft_loss": 0.84765625, "step": 4396 }, { "dpo_loss": 0.01806640625, "epoch": 0.7, "final_loss": 0.01806640625, "grad_norm": 0.0, "learning_rate": 2.0547100266519134e-07, "loss": 0.1415, "projector_lr": 6.16413007995574e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.28125, "sft_loss": 0.57421875, "step": 4397 }, { "dpo_loss": 0.3515625, "epoch": 0.7, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 2.052658772366032e-07, "loss": 0.3942, "projector_lr": 6.157976317098097e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.078125, "rewards_train/margins": 3.09375, "rewards_train/rejected": -5.1875, "sft_loss": 0.6328125, "step": 4398 }, { "dpo_loss": 0.1328125, "epoch": 0.7, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 2.050608278003736e-07, "loss": 0.214, "projector_lr": 6.151824834011209e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 3.390625, "rewards_train/rejected": -5.03125, "sft_loss": 0.890625, "step": 4399 }, { "dpo_loss": 0.3359375, "epoch": 0.7, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 2.048558544093708e-07, "loss": 0.3392, "projector_lr": 6.145675632281125e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.03125, "sft_loss": 0.56640625, "step": 4400 }, { "dpo_loss": 0.2216796875, "epoch": 0.7, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 2.0465095711644432e-07, "loss": 0.1158, "projector_lr": 6.13952871349333e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.96875, "sft_loss": 0.8515625, "step": 4401 }, { "dpo_loss": 0.060302734375, "epoch": 0.7, "final_loss": 0.060302734375, "grad_norm": 0.0, "learning_rate": 2.0444613597442328e-07, "loss": 0.2567, "projector_lr": 6.133384079232699e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.1875, "sft_loss": 0.78125, "step": 4402 }, { "dpo_loss": 0.2578125, "epoch": 0.7, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 2.0424139103611754e-07, "loss": 0.1594, "projector_lr": 6.127241731083527e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.09375, "rewards_train/rejected": -6.125, "sft_loss": 1.203125, "step": 4403 }, { "dpo_loss": 0.0208740234375, "epoch": 0.7, "final_loss": 0.0208740234375, "grad_norm": 0.0, "learning_rate": 2.0403672235431707e-07, "loss": 0.215, "projector_lr": 6.121101670629512e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.46875, "sft_loss": 0.5625, "step": 4404 }, { "dpo_loss": 0.0859375, "epoch": 0.7, "final_loss": 0.0859375, "grad_norm": 0.0, "learning_rate": 2.0383212998179257e-07, "loss": 0.0629, "projector_lr": 6.114963899453777e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.84375, "sft_loss": 0.494140625, "step": 4405 }, { "dpo_loss": 0.3046875, "epoch": 0.7, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 2.036276139712945e-07, "loss": 0.1776, "projector_lr": 6.108828419138836e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.25, "sft_loss": 0.734375, "step": 4406 }, { "dpo_loss": 0.25390625, "epoch": 0.71, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 2.034231743755546e-07, "loss": 0.1711, "projector_lr": 6.102695231266639e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.65625, "sft_loss": 0.7109375, "step": 4407 }, { "dpo_loss": 0.12890625, "epoch": 0.71, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 2.0321881124728345e-07, "loss": 0.3929, "projector_lr": 6.096564337418504e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.625, "sft_loss": 0.56640625, "step": 4408 }, { "dpo_loss": 0.1708984375, "epoch": 0.71, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 2.030145246391733e-07, "loss": 0.1282, "projector_lr": 6.0904357391752e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.1875, "sft_loss": 0.80859375, "step": 4409 }, { "dpo_loss": 0.09130859375, "epoch": 0.71, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 2.0281031460389574e-07, "loss": 0.0485, "projector_lr": 6.084309438116873e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.4375, "sft_loss": 0.74609375, "step": 4410 }, { "dpo_loss": 0.27734375, "epoch": 0.71, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 2.0260618119410332e-07, "loss": 0.4498, "projector_lr": 6.0781854358231e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.96875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.6875, "sft_loss": 0.7109375, "step": 4411 }, { "dpo_loss": 0.341796875, "epoch": 0.71, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 2.0240212446242833e-07, "loss": 0.3448, "projector_lr": 6.07206373387285e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.078125, "rewards_train/margins": 3.234375, "rewards_train/rejected": -5.3125, "sft_loss": 1.4453125, "step": 4412 }, { "dpo_loss": 0.0106201171875, "epoch": 0.71, "final_loss": 0.0106201171875, "grad_norm": 0.0, "learning_rate": 2.0219814446148342e-07, "loss": 0.035, "projector_lr": 6.065944333844503e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.09375, "sft_loss": 0.6953125, "step": 4413 }, { "dpo_loss": 0.045166015625, "epoch": 0.71, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 2.019942412438614e-07, "loss": 0.1907, "projector_lr": 6.059827237315842e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5, "rewards_train/margins": 7.15625, "rewards_train/rejected": -7.6875, "sft_loss": 0.7421875, "step": 4414 }, { "dpo_loss": 0.01214599609375, "epoch": 0.71, "final_loss": 0.01214599609375, "grad_norm": 0.0, "learning_rate": 2.017904148621356e-07, "loss": 0.0501, "projector_lr": 6.053712445864069e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.5, "sft_loss": 0.69921875, "step": 4415 }, { "dpo_loss": 0.0322265625, "epoch": 0.71, "final_loss": 0.0322265625, "grad_norm": 0.0, "learning_rate": 2.015866653688592e-07, "loss": 0.1726, "projector_lr": 6.047599961065777e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.244140625, "rewards_train/margins": 6.625, "rewards_train/rejected": -6.84375, "sft_loss": 0.6953125, "step": 4416 }, { "dpo_loss": 0.048583984375, "epoch": 0.71, "final_loss": 0.048583984375, "grad_norm": 0.0, "learning_rate": 2.013829928165655e-07, "loss": 0.073, "projector_lr": 6.041489784496965e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.8125, "sft_loss": 0.94140625, "step": 4417 }, { "dpo_loss": 0.2099609375, "epoch": 0.71, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 2.0117939725776845e-07, "loss": 0.2206, "projector_lr": 6.035381917733054e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.625, "sft_loss": 0.8359375, "step": 4418 }, { "dpo_loss": 0.00970458984375, "epoch": 0.71, "final_loss": 0.00970458984375, "grad_norm": 0.0, "learning_rate": 2.0097587874496163e-07, "loss": 0.2072, "projector_lr": 6.02927636234885e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17578125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.375, "sft_loss": 0.6796875, "step": 4419 }, { "dpo_loss": 0.08984375, "epoch": 0.71, "final_loss": 0.08984375, "grad_norm": 0.0, "learning_rate": 2.00772437330619e-07, "loss": 0.0596, "projector_lr": 6.02317311991857e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.375, "sft_loss": 0.76953125, "step": 4420 }, { "dpo_loss": 0.06982421875, "epoch": 0.71, "final_loss": 0.06982421875, "grad_norm": 0.0, "learning_rate": 2.0056907306719438e-07, "loss": 0.1229, "projector_lr": 6.017072192015832e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2138671875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.03125, "sft_loss": 0.88671875, "step": 4421 }, { "dpo_loss": 0.1748046875, "epoch": 0.71, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 2.0036578600712228e-07, "loss": 0.2456, "projector_lr": 6.010973580213669e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.40625, "sft_loss": 1.0, "step": 4422 }, { "dpo_loss": 0.087890625, "epoch": 0.71, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 2.0016257620281657e-07, "loss": 0.0572, "projector_lr": 6.004877286084498e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1435546875, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.65625, "sft_loss": 0.6640625, "step": 4423 }, { "dpo_loss": 0.193359375, "epoch": 0.71, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 1.9995944370667195e-07, "loss": 0.2709, "projector_lr": 5.998783311200159e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.875, "sft_loss": 0.93359375, "step": 4424 }, { "dpo_loss": 0.015380859375, "epoch": 0.71, "final_loss": 0.015380859375, "grad_norm": 0.0, "learning_rate": 1.9975638857106258e-07, "loss": 0.0457, "projector_lr": 5.992691657131877e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.0625, "sft_loss": 0.70703125, "step": 4425 }, { "dpo_loss": 0.376953125, "epoch": 0.71, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 1.9955341084834288e-07, "loss": 0.3464, "projector_lr": 5.986602325450287e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 2.625, "rewards_train/rejected": -4.28125, "sft_loss": 0.91796875, "step": 4426 }, { "dpo_loss": 0.1533203125, "epoch": 0.71, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 1.9935051059084724e-07, "loss": 0.1269, "projector_lr": 5.980515317725418e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.8125, "sft_loss": 0.796875, "step": 4427 }, { "dpo_loss": 0.2451171875, "epoch": 0.71, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 1.9914768785089042e-07, "loss": 0.1575, "projector_lr": 5.974430635526713e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.0, "sft_loss": 0.95703125, "step": 4428 }, { "dpo_loss": 0.12890625, "epoch": 0.71, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 1.9894494268076677e-07, "loss": 0.1785, "projector_lr": 5.968348280423004e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2236328125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.53125, "sft_loss": 0.51953125, "step": 4429 }, { "dpo_loss": 0.19140625, "epoch": 0.71, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 1.9874227513275088e-07, "loss": 0.1455, "projector_lr": 5.962268253982527e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1953125, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.1875, "sft_loss": 0.76953125, "step": 4430 }, { "dpo_loss": 0.3671875, "epoch": 0.71, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 1.9853968525909697e-07, "loss": 0.2341, "projector_lr": 5.95619055777291e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.03125, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.625, "sft_loss": 1.0625, "step": 4431 }, { "dpo_loss": 0.6484375, "epoch": 0.71, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 1.9833717311204002e-07, "loss": 0.5822, "projector_lr": 5.950115193361201e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.53125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.625, "sft_loss": 0.6484375, "step": 4432 }, { "dpo_loss": 0.36328125, "epoch": 0.71, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 1.9813473874379395e-07, "loss": 0.2199, "projector_lr": 5.944042162313819e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 2.375, "rewards_train/rejected": -3.6875, "sft_loss": 0.88671875, "step": 4433 }, { "dpo_loss": 0.138671875, "epoch": 0.71, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 1.9793238220655373e-07, "loss": 0.2978, "projector_lr": 5.937971466196612e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.21875, "sft_loss": 0.58984375, "step": 4434 }, { "dpo_loss": 0.3671875, "epoch": 0.71, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 1.9773010355249303e-07, "loss": 0.4379, "projector_lr": 5.931903106574791e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.3125, "sft_loss": 0.6328125, "step": 4435 }, { "dpo_loss": 0.038818359375, "epoch": 0.71, "final_loss": 0.038818359375, "grad_norm": 0.0, "learning_rate": 1.9752790283376651e-07, "loss": 0.0503, "projector_lr": 5.925837085012996e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.859375, "rewards_train/margins": 7.75, "rewards_train/rejected": -8.625, "sft_loss": 0.6484375, "step": 4436 }, { "dpo_loss": 0.251953125, "epoch": 0.71, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 1.97325780102508e-07, "loss": 0.1891, "projector_lr": 5.919773403075241e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.5625, "sft_loss": 0.95703125, "step": 4437 }, { "dpo_loss": 0.045654296875, "epoch": 0.71, "final_loss": 0.045654296875, "grad_norm": 0.0, "learning_rate": 1.971237354108321e-07, "loss": 0.1248, "projector_lr": 5.913712062324964e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.28125, "sft_loss": 1.2578125, "step": 4438 }, { "dpo_loss": 0.035400390625, "epoch": 0.71, "final_loss": 0.035400390625, "grad_norm": 0.0, "learning_rate": 1.96921768810832e-07, "loss": 0.137, "projector_lr": 5.90765306432496e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.78125, "sft_loss": 0.65234375, "step": 4439 }, { "dpo_loss": 0.25390625, "epoch": 0.71, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 1.9671988035458193e-07, "loss": 0.1603, "projector_lr": 5.901596410637458e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.0, "sft_loss": 0.71484375, "step": 4440 }, { "dpo_loss": 0.51171875, "epoch": 0.71, "final_loss": 0.51171875, "grad_norm": 0.0, "learning_rate": 1.9651807009413525e-07, "loss": 0.3202, "projector_lr": 5.895542102824057e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.5625, "sft_loss": 0.6953125, "step": 4441 }, { "dpo_loss": 0.1923828125, "epoch": 0.71, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 1.963163380815257e-07, "loss": 0.114, "projector_lr": 5.889490142445772e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.3125, "sft_loss": 0.609375, "step": 4442 }, { "dpo_loss": 0.609375, "epoch": 0.71, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 1.9611468436876643e-07, "loss": 0.3172, "projector_lr": 5.883440531062993e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.140625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.4375, "sft_loss": 0.81640625, "step": 4443 }, { "dpo_loss": 0.0244140625, "epoch": 0.71, "final_loss": 0.0244140625, "grad_norm": 0.0, "learning_rate": 1.9591310900785063e-07, "loss": 0.6591, "projector_lr": 5.877393270235519e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.78125, "sft_loss": 0.6640625, "step": 4444 }, { "dpo_loss": 0.1201171875, "epoch": 0.71, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 1.9571161205075087e-07, "loss": 0.1282, "projector_lr": 5.871348361522527e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.984375, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.625, "sft_loss": 0.75390625, "step": 4445 }, { "dpo_loss": 0.03466796875, "epoch": 0.71, "final_loss": 0.03466796875, "grad_norm": 0.0, "learning_rate": 1.9551019354942027e-07, "loss": 0.052, "projector_lr": 5.865305806482609e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.8125, "sft_loss": 0.7109375, "step": 4446 }, { "dpo_loss": 0.10400390625, "epoch": 0.71, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 1.9530885355579114e-07, "loss": 0.2209, "projector_lr": 5.859265606673734e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.25, "sft_loss": 0.5078125, "step": 4447 }, { "dpo_loss": 0.140625, "epoch": 0.71, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 1.9510759212177558e-07, "loss": 0.2403, "projector_lr": 5.853227763653268e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.71875, "sft_loss": 0.79296875, "step": 4448 }, { "dpo_loss": 0.0634765625, "epoch": 0.71, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 1.949064092992655e-07, "loss": 0.0419, "projector_lr": 5.847192278977965e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.59375, "sft_loss": 0.8203125, "step": 4449 }, { "dpo_loss": 0.0299072265625, "epoch": 0.71, "final_loss": 0.0299072265625, "grad_norm": 0.0, "learning_rate": 1.9470530514013288e-07, "loss": 0.0493, "projector_lr": 5.841159154203987e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.125, "sft_loss": 0.77734375, "step": 4450 }, { "dpo_loss": 0.65234375, "epoch": 0.71, "final_loss": 0.65234375, "grad_norm": 0.0, "learning_rate": 1.945042796962288e-07, "loss": 0.4671, "projector_lr": 5.835128390886865e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.84375, "sft_loss": 0.609375, "step": 4451 }, { "dpo_loss": 0.09375, "epoch": 0.71, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 1.9430333301938467e-07, "loss": 0.2196, "projector_lr": 5.82909999058154e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90625, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.703125, "sft_loss": 0.93359375, "step": 4452 }, { "dpo_loss": 0.01416015625, "epoch": 0.71, "final_loss": 0.01416015625, "grad_norm": 0.0, "learning_rate": 1.9410246516141115e-07, "loss": 0.1008, "projector_lr": 5.823073954842335e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4609375, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.34375, "sft_loss": 0.62890625, "step": 4453 }, { "dpo_loss": 0.6015625, "epoch": 0.71, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 1.939016761740988e-07, "loss": 0.377, "projector_lr": 5.817050285222965e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.09375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -6.65625, "sft_loss": 0.71484375, "step": 4454 }, { "dpo_loss": 0.482421875, "epoch": 0.71, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 1.937009661092175e-07, "loss": 0.2487, "projector_lr": 5.811028983276525e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.875, "sft_loss": 0.578125, "step": 4455 }, { "dpo_loss": 0.1943359375, "epoch": 0.71, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 1.9350033501851736e-07, "loss": 0.1212, "projector_lr": 5.805010050555522e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.15625, "sft_loss": 0.64453125, "step": 4456 }, { "dpo_loss": 0.107421875, "epoch": 0.71, "final_loss": 0.107421875, "grad_norm": 0.0, "learning_rate": 1.9329978295372773e-07, "loss": 0.1058, "projector_lr": 5.798993488611833e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.03125, "sft_loss": 0.71484375, "step": 4457 }, { "dpo_loss": 0.33984375, "epoch": 0.71, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 1.9309930996655749e-07, "loss": 0.2603, "projector_lr": 5.792979298996725e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.25, "sft_loss": 0.9609375, "step": 4458 }, { "dpo_loss": 0.1318359375, "epoch": 0.71, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 1.9289891610869563e-07, "loss": 0.1461, "projector_lr": 5.78696748326087e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.625, "sft_loss": 0.80078125, "step": 4459 }, { "dpo_loss": 0.408203125, "epoch": 0.71, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 1.926986014318102e-07, "loss": 0.2073, "projector_lr": 5.780958042954306e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.265625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -5.125, "sft_loss": 0.89453125, "step": 4460 }, { "dpo_loss": 0.013671875, "epoch": 0.71, "final_loss": 0.013671875, "grad_norm": 0.0, "learning_rate": 1.9249836598754916e-07, "loss": 0.1573, "projector_lr": 5.774950979626475e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34765625, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.34375, "sft_loss": 0.66015625, "step": 4461 }, { "dpo_loss": 0.16796875, "epoch": 0.71, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 1.9229820982753968e-07, "loss": 0.2823, "projector_lr": 5.768946294826191e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.46875, "sft_loss": 0.7578125, "step": 4462 }, { "dpo_loss": 0.0966796875, "epoch": 0.71, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 1.9209813300338912e-07, "loss": 0.1516, "projector_lr": 5.762943990101674e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.125, "sft_loss": 1.1796875, "step": 4463 }, { "dpo_loss": 0.052734375, "epoch": 0.71, "final_loss": 0.052734375, "grad_norm": 0.0, "learning_rate": 1.9189813556668365e-07, "loss": 0.0558, "projector_lr": 5.75694406700051e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.65625, "sft_loss": 0.671875, "step": 4464 }, { "dpo_loss": 0.058349609375, "epoch": 0.71, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 1.9169821756898985e-07, "loss": 0.0967, "projector_lr": 5.750946527069696e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.15625, "sft_loss": 0.87890625, "step": 4465 }, { "dpo_loss": 0.08447265625, "epoch": 0.71, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 1.9149837906185263e-07, "loss": 0.1031, "projector_lr": 5.744951371855579e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.9375, "sft_loss": 0.56640625, "step": 4466 }, { "dpo_loss": 0.228515625, "epoch": 0.71, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 1.912986200967975e-07, "loss": 0.127, "projector_lr": 5.738958602903925e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.047119140625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.21875, "sft_loss": 0.5625, "step": 4467 }, { "dpo_loss": 0.62890625, "epoch": 0.71, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 1.910989407253288e-07, "loss": 0.3373, "projector_lr": 5.732968221759864e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.75, "sft_loss": 0.59765625, "step": 4468 }, { "dpo_loss": 0.06982421875, "epoch": 0.72, "final_loss": 0.06982421875, "grad_norm": 0.0, "learning_rate": 1.9089934099893085e-07, "loss": 0.4477, "projector_lr": 5.726980229967926e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.25, "sft_loss": 0.99609375, "step": 4469 }, { "dpo_loss": 0.060302734375, "epoch": 0.72, "final_loss": 0.060302734375, "grad_norm": 0.0, "learning_rate": 1.90699820969067e-07, "loss": 0.0356, "projector_lr": 5.72099462907201e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.4375, "sft_loss": 0.546875, "step": 4470 }, { "dpo_loss": 0.10888671875, "epoch": 0.72, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 1.9050038068718027e-07, "loss": 0.0702, "projector_lr": 5.715011420615408e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4140625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.4375, "sft_loss": 0.80859375, "step": 4471 }, { "dpo_loss": 0.07470703125, "epoch": 0.72, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 1.9030102020469284e-07, "loss": 0.1813, "projector_lr": 5.709030606140786e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -6.03125, "sft_loss": 0.8515625, "step": 4472 }, { "dpo_loss": 0.244140625, "epoch": 0.72, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 1.9010173957300696e-07, "loss": 0.1502, "projector_lr": 5.703052187190209e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.4375, "sft_loss": 0.7734375, "step": 4473 }, { "dpo_loss": 0.037109375, "epoch": 0.72, "final_loss": 0.037109375, "grad_norm": 0.0, "learning_rate": 1.899025388435036e-07, "loss": 0.1491, "projector_lr": 5.697076165305108e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.84375, "sft_loss": 0.84375, "step": 4474 }, { "dpo_loss": 0.515625, "epoch": 0.72, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 1.897034180675434e-07, "loss": 0.299, "projector_lr": 5.691102542026303e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.359375, "rewards_train/margins": 2.71875, "rewards_train/rejected": -4.0625, "sft_loss": 0.73046875, "step": 4475 }, { "dpo_loss": 0.146484375, "epoch": 0.72, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 1.895043772964663e-07, "loss": 0.4809, "projector_lr": 5.685131318893989e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.5625, "sft_loss": 0.875, "step": 4476 }, { "dpo_loss": 0.341796875, "epoch": 0.72, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 1.89305416581592e-07, "loss": 0.2167, "projector_lr": 5.67916249744776e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.40625, "sft_loss": 0.7265625, "step": 4477 }, { "dpo_loss": 0.232421875, "epoch": 0.72, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 1.891065359742189e-07, "loss": 0.1779, "projector_lr": 5.673196079226567e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.59375, "sft_loss": 0.76953125, "step": 4478 }, { "dpo_loss": 0.0927734375, "epoch": 0.72, "final_loss": 0.0927734375, "grad_norm": 0.0, "learning_rate": 1.889077355256256e-07, "loss": 0.3401, "projector_lr": 5.667232065768769e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.34375, "sft_loss": 0.671875, "step": 4479 }, { "dpo_loss": 0.1337890625, "epoch": 0.72, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 1.887090152870689e-07, "loss": 0.1933, "projector_lr": 5.661270458612067e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 2.890625, "rewards_train/rejected": -3.625, "sft_loss": 0.703125, "step": 4480 }, { "dpo_loss": 0.48828125, "epoch": 0.72, "final_loss": 0.48828125, "grad_norm": 0.0, "learning_rate": 1.8851037530978602e-07, "loss": 0.451, "projector_lr": 5.655311259293582e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.046875, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.8125, "sft_loss": 0.72265625, "step": 4481 }, { "dpo_loss": 0.490234375, "epoch": 0.72, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 1.883118156449927e-07, "loss": 0.2483, "projector_lr": 5.649354469349782e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.65625, "sft_loss": 1.046875, "step": 4482 }, { "dpo_loss": 0.0191650390625, "epoch": 0.72, "final_loss": 0.0191650390625, "grad_norm": 0.0, "learning_rate": 1.8811333634388483e-07, "loss": 0.1465, "projector_lr": 5.643400090316545e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.291015625, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.125, "sft_loss": 0.72265625, "step": 4483 }, { "dpo_loss": 0.29296875, "epoch": 0.72, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 1.8791493745763632e-07, "loss": 0.1964, "projector_lr": 5.637448123729089e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.71875, "sft_loss": 0.6640625, "step": 4484 }, { "dpo_loss": 0.1650390625, "epoch": 0.72, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 1.877166190374016e-07, "loss": 0.2974, "projector_lr": 5.631498571122048e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.65625, "sft_loss": 0.77734375, "step": 4485 }, { "dpo_loss": 0.2021484375, "epoch": 0.72, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 1.8751838113431346e-07, "loss": 0.186, "projector_lr": 5.625551434029404e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8125, "rewards_train/margins": 3.46875, "rewards_train/rejected": -4.28125, "sft_loss": 0.72265625, "step": 4486 }, { "dpo_loss": 0.064453125, "epoch": 0.72, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 1.8732022379948465e-07, "loss": 0.169, "projector_lr": 5.61960671398454e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.53125, "sft_loss": 0.9765625, "step": 4487 }, { "dpo_loss": 0.109375, "epoch": 0.72, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 1.8712214708400664e-07, "loss": 0.1035, "projector_lr": 5.6136644125202e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.375, "sft_loss": 0.87109375, "step": 4488 }, { "dpo_loss": 0.125, "epoch": 0.72, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 1.869241510389502e-07, "loss": 0.3988, "projector_lr": 5.607724531168506e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2021484375, "rewards_train/margins": 3.59375, "rewards_train/rejected": -3.390625, "sft_loss": 0.6953125, "step": 4489 }, { "dpo_loss": 0.203125, "epoch": 0.72, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 1.8672623571536523e-07, "loss": 0.344, "projector_lr": 5.601787071460957e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 2.796875, "rewards_train/rejected": -4.0, "sft_loss": 0.75, "step": 4490 }, { "dpo_loss": 0.1943359375, "epoch": 0.72, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 1.8652840116428126e-07, "loss": 0.2707, "projector_lr": 5.595852034928438e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.1875, "sft_loss": 0.6953125, "step": 4491 }, { "dpo_loss": 0.095703125, "epoch": 0.72, "final_loss": 0.095703125, "grad_norm": 0.0, "learning_rate": 1.8633064743670657e-07, "loss": 0.0568, "projector_lr": 5.589919423101198e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.78125, "sft_loss": 0.69921875, "step": 4492 }, { "dpo_loss": 0.07080078125, "epoch": 0.72, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 1.8613297458362855e-07, "loss": 0.1659, "projector_lr": 5.583989237508856e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.4375, "sft_loss": 0.79296875, "step": 4493 }, { "dpo_loss": 0.2119140625, "epoch": 0.72, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 1.8593538265601411e-07, "loss": 0.1517, "projector_lr": 5.578061479680423e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9140625, "rewards_train/margins": 3.484375, "rewards_train/rejected": -5.40625, "sft_loss": 0.98828125, "step": 4494 }, { "dpo_loss": 0.12353515625, "epoch": 0.72, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 1.857378717048091e-07, "loss": 0.0808, "projector_lr": 5.572136151144274e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.3125, "sft_loss": 1.0078125, "step": 4495 }, { "dpo_loss": 0.25, "epoch": 0.72, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 1.855404417809382e-07, "loss": 0.4227, "projector_lr": 5.566213253428146e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.09375, "sft_loss": 0.62109375, "step": 4496 }, { "dpo_loss": 0.06201171875, "epoch": 0.72, "final_loss": 0.06201171875, "grad_norm": 0.0, "learning_rate": 1.853430929353058e-07, "loss": 0.0791, "projector_lr": 5.560292788059175e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.625, "sft_loss": 0.74609375, "step": 4497 }, { "dpo_loss": 0.06787109375, "epoch": 0.72, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 1.8514582521879497e-07, "loss": 0.1317, "projector_lr": 5.554374756563849e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.875, "sft_loss": 1.03125, "step": 4498 }, { "dpo_loss": 0.125, "epoch": 0.72, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 1.8494863868226774e-07, "loss": 0.1281, "projector_lr": 5.548459160468032e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.71875, "sft_loss": 0.82421875, "step": 4499 }, { "dpo_loss": 0.291015625, "epoch": 0.72, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 1.8475153337656574e-07, "loss": 0.2617, "projector_lr": 5.542546001296972e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.03125, "sft_loss": 0.482421875, "step": 4500 }, { "dpo_loss": 0.048828125, "epoch": 0.72, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 1.8455450935250922e-07, "loss": 0.0291, "projector_lr": 5.536635280575277e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.34375, "sft_loss": 0.76953125, "step": 4501 }, { "dpo_loss": 0.5234375, "epoch": 0.72, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 1.8435756666089757e-07, "loss": 0.3443, "projector_lr": 5.530726999826928e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.078125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -5.125, "sft_loss": 0.65234375, "step": 4502 }, { "dpo_loss": 0.1611328125, "epoch": 0.72, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 1.841607053525091e-07, "loss": 0.1109, "projector_lr": 5.524821160575273e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.03125, "sft_loss": 0.78125, "step": 4503 }, { "dpo_loss": 0.05029296875, "epoch": 0.72, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 1.839639254781015e-07, "loss": 0.0798, "projector_lr": 5.518917764343046e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.65625, "sft_loss": 0.6640625, "step": 4504 }, { "dpo_loss": 0.21484375, "epoch": 0.72, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 1.8376722708841103e-07, "loss": 0.2707, "projector_lr": 5.513016812652331e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.859375, "rewards_train/rejected": -5.15625, "sft_loss": 1.28125, "step": 4505 }, { "dpo_loss": 0.029541015625, "epoch": 0.72, "final_loss": 0.029541015625, "grad_norm": 0.0, "learning_rate": 1.8357061023415353e-07, "loss": 0.2337, "projector_lr": 5.507118307024606e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0164794921875, "rewards_train/margins": 6.5625, "rewards_train/rejected": -6.53125, "sft_loss": 0.8359375, "step": 4506 }, { "dpo_loss": 0.47265625, "epoch": 0.72, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 1.8337407496602285e-07, "loss": 0.3035, "projector_lr": 5.501222248980687e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 3.5, "rewards_train/rejected": -3.96875, "sft_loss": 1.0234375, "step": 4507 }, { "dpo_loss": 0.54296875, "epoch": 0.72, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 1.8317762133469284e-07, "loss": 0.2742, "projector_lr": 5.495328640040786e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 2.65625, "rewards_train/rejected": -4.125, "sft_loss": 0.96484375, "step": 4508 }, { "dpo_loss": 0.2001953125, "epoch": 0.72, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 1.829812493908155e-07, "loss": 0.2393, "projector_lr": 5.489437481724465e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.71875, "sft_loss": 0.7734375, "step": 4509 }, { "dpo_loss": 0.10498046875, "epoch": 0.72, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 1.827849591850227e-07, "loss": 0.2952, "projector_lr": 5.483548775550681e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 6.28125, "rewards_train/rejected": -7.09375, "sft_loss": 0.60546875, "step": 4510 }, { "dpo_loss": 0.099609375, "epoch": 0.72, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 1.8258875076792384e-07, "loss": 0.0559, "projector_lr": 5.477662523037716e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.46875, "sft_loss": 0.94140625, "step": 4511 }, { "dpo_loss": 0.03173828125, "epoch": 0.72, "final_loss": 0.03173828125, "grad_norm": 0.0, "learning_rate": 1.8239262419010865e-07, "loss": 0.0627, "projector_lr": 5.471778725703259e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.53125, "sft_loss": 0.6015625, "step": 4512 }, { "dpo_loss": 0.2412109375, "epoch": 0.72, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 1.8219657950214478e-07, "loss": 0.1316, "projector_lr": 5.465897385064344e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.6875, "sft_loss": 0.75390625, "step": 4513 }, { "dpo_loss": 0.1259765625, "epoch": 0.72, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 1.820006167545795e-07, "loss": 0.0678, "projector_lr": 5.460018502637385e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.09375, "sft_loss": 0.53125, "step": 4514 }, { "dpo_loss": 0.006591796875, "epoch": 0.72, "final_loss": 0.006591796875, "grad_norm": 0.0, "learning_rate": 1.8180473599793838e-07, "loss": 0.0797, "projector_lr": 5.454142079938152e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.5, "sft_loss": 0.734375, "step": 4515 }, { "dpo_loss": 0.466796875, "epoch": 0.72, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 1.8160893728272613e-07, "loss": 0.581, "projector_lr": 5.448268118481785e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.09375, "sft_loss": 1.015625, "step": 4516 }, { "dpo_loss": 0.05322265625, "epoch": 0.72, "final_loss": 0.05322265625, "grad_norm": 0.0, "learning_rate": 1.8141322065942594e-07, "loss": 0.2582, "projector_lr": 5.442396619782778e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.9375, "sft_loss": 0.8203125, "step": 4517 }, { "dpo_loss": 0.083984375, "epoch": 0.72, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 1.8121758617850058e-07, "loss": 0.0992, "projector_lr": 5.436527585355018e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.96875, "sft_loss": 0.671875, "step": 4518 }, { "dpo_loss": 0.11474609375, "epoch": 0.72, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 1.81022033890391e-07, "loss": 0.1546, "projector_lr": 5.43066101671173e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.53125, "sft_loss": 0.74609375, "step": 4519 }, { "dpo_loss": 0.341796875, "epoch": 0.72, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 1.8082656384551714e-07, "loss": 0.1968, "projector_lr": 5.424796915365514e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.109375, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.78125, "sft_loss": 0.703125, "step": 4520 }, { "dpo_loss": 0.10205078125, "epoch": 0.72, "final_loss": 0.10205078125, "grad_norm": 0.0, "learning_rate": 1.8063117609427757e-07, "loss": 0.1146, "projector_lr": 5.418935282828327e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.5, "sft_loss": 0.77734375, "step": 4521 }, { "dpo_loss": 0.16015625, "epoch": 0.72, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 1.8043587068705018e-07, "loss": 0.2028, "projector_lr": 5.413076120611506e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.5, "sft_loss": 1.140625, "step": 4522 }, { "dpo_loss": 0.076171875, "epoch": 0.72, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 1.8024064767419084e-07, "loss": 0.11, "projector_lr": 5.407219430225725e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2373046875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -4.65625, "sft_loss": 0.66015625, "step": 4523 }, { "dpo_loss": 0.21484375, "epoch": 0.72, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 1.8004550710603522e-07, "loss": 0.1596, "projector_lr": 5.401365213181057e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 2.59375, "rewards_train/rejected": -3.90625, "sft_loss": 0.8046875, "step": 4524 }, { "dpo_loss": 0.0625, "epoch": 0.72, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 1.7985044903289643e-07, "loss": 0.0876, "projector_lr": 5.395513470986893e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.546875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.1875, "sft_loss": 0.79296875, "step": 4525 }, { "dpo_loss": 0.1904296875, "epoch": 0.72, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 1.7965547350506737e-07, "loss": 0.1443, "projector_lr": 5.389664205152022e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.21875, "sft_loss": 0.59765625, "step": 4526 }, { "dpo_loss": 0.1552734375, "epoch": 0.72, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 1.7946058057281909e-07, "loss": 0.1065, "projector_lr": 5.383817417184573e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.40625, "sft_loss": 0.875, "step": 4527 }, { "dpo_loss": 0.1650390625, "epoch": 0.72, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 1.7926577028640182e-07, "loss": 0.1146, "projector_lr": 5.377973108592055e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.5, "sft_loss": 0.69140625, "step": 4528 }, { "dpo_loss": 0.039306640625, "epoch": 0.72, "final_loss": 0.039306640625, "grad_norm": 0.0, "learning_rate": 1.79071042696044e-07, "loss": 0.074, "projector_lr": 5.372131280881321e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.0, "sft_loss": 0.59765625, "step": 4529 }, { "dpo_loss": 0.058349609375, "epoch": 0.72, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 1.7887639785195302e-07, "loss": 0.1115, "projector_lr": 5.366291935558591e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640625, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.625, "sft_loss": 0.71484375, "step": 4530 }, { "dpo_loss": 0.0218505859375, "epoch": 0.72, "final_loss": 0.0218505859375, "grad_norm": 0.0, "learning_rate": 1.786818358043146e-07, "loss": 0.0527, "projector_lr": 5.360455074129439e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.53125, "sft_loss": 0.91015625, "step": 4531 }, { "dpo_loss": 0.0250244140625, "epoch": 0.73, "final_loss": 0.0250244140625, "grad_norm": 0.0, "learning_rate": 1.784873566032938e-07, "loss": 0.1498, "projector_lr": 5.354620698098814e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.1875, "sft_loss": 0.59375, "step": 4532 }, { "dpo_loss": 0.2177734375, "epoch": 0.73, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 1.7829296029903363e-07, "loss": 0.2054, "projector_lr": 5.34878880897101e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.5625, "sft_loss": 0.67578125, "step": 4533 }, { "dpo_loss": 0.0263671875, "epoch": 0.73, "final_loss": 0.0263671875, "grad_norm": 0.0, "learning_rate": 1.780986469416559e-07, "loss": 0.0643, "projector_lr": 5.342959408249676e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515625, "rewards_train/margins": 6.5625, "rewards_train/rejected": -8.0625, "sft_loss": 0.859375, "step": 4534 }, { "dpo_loss": 0.3203125, "epoch": 0.73, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 1.7790441658126138e-07, "loss": 0.1893, "projector_lr": 5.337132497437841e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.25, "sft_loss": 0.51171875, "step": 4535 }, { "dpo_loss": 0.55859375, "epoch": 0.73, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 1.7771026926792909e-07, "loss": 0.315, "projector_lr": 5.331308078037873e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.28125, "sft_loss": 0.63671875, "step": 4536 }, { "dpo_loss": 0.31640625, "epoch": 0.73, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 1.7751620505171667e-07, "loss": 0.2113, "projector_lr": 5.3254861515515e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.046875, "rewards_train/rejected": -4.5, "sft_loss": 1.0625, "step": 4537 }, { "dpo_loss": 0.042236328125, "epoch": 0.73, "final_loss": 0.042236328125, "grad_norm": 0.0, "learning_rate": 1.7732222398266028e-07, "loss": 0.1843, "projector_lr": 5.319666719479809e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.5625, "sft_loss": 0.671875, "step": 4538 }, { "dpo_loss": 0.0498046875, "epoch": 0.73, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 1.7712832611077506e-07, "loss": 0.3333, "projector_lr": 5.313849783323252e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.25, "sft_loss": 0.65234375, "step": 4539 }, { "dpo_loss": 0.21875, "epoch": 0.73, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 1.7693451148605404e-07, "loss": 0.1615, "projector_lr": 5.308035344581621e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.5, "sft_loss": 0.8984375, "step": 4540 }, { "dpo_loss": 0.1044921875, "epoch": 0.73, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 1.767407801584695e-07, "loss": 0.1701, "projector_lr": 5.302223404754085e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53125, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.5, "sft_loss": 0.62109375, "step": 4541 }, { "dpo_loss": 0.1015625, "epoch": 0.73, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 1.7654713217797174e-07, "loss": 0.0737, "projector_lr": 5.296413965339153e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.71875, "sft_loss": 0.5703125, "step": 4542 }, { "dpo_loss": 0.1865234375, "epoch": 0.73, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 1.763535675944897e-07, "loss": 0.4589, "projector_lr": 5.290607027834692e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.4375, "sft_loss": 0.7265625, "step": 4543 }, { "dpo_loss": 0.265625, "epoch": 0.73, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 1.7616008645793069e-07, "loss": 0.2095, "projector_lr": 5.284802593737921e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.5, "sft_loss": 0.8203125, "step": 4544 }, { "dpo_loss": 0.2451171875, "epoch": 0.73, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 1.759666888181809e-07, "loss": 0.278, "projector_lr": 5.279000664545427e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.375, "sft_loss": 0.78125, "step": 4545 }, { "dpo_loss": 0.2314453125, "epoch": 0.73, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 1.7577337472510462e-07, "loss": 0.228, "projector_lr": 5.273201241753139e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.96875, "sft_loss": 1.328125, "step": 4546 }, { "dpo_loss": 0.1494140625, "epoch": 0.73, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 1.7558014422854472e-07, "loss": 0.0861, "projector_lr": 5.267404326856342e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.59375, "sft_loss": 0.87109375, "step": 4547 }, { "dpo_loss": 0.171875, "epoch": 0.73, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 1.7538699737832237e-07, "loss": 0.1368, "projector_lr": 5.261609921349671e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.34375, "sft_loss": 0.73828125, "step": 4548 }, { "dpo_loss": 0.134765625, "epoch": 0.73, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 1.7519393422423763e-07, "loss": 0.1144, "projector_lr": 5.255818026727129e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.40625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.46875, "sft_loss": 0.59375, "step": 4549 }, { "dpo_loss": 0.0615234375, "epoch": 0.73, "final_loss": 0.0615234375, "grad_norm": 0.0, "learning_rate": 1.750009548160683e-07, "loss": 0.0418, "projector_lr": 5.250028644482049e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.40625, "sft_loss": 0.75390625, "step": 4550 }, { "dpo_loss": 0.0654296875, "epoch": 0.73, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 1.748080592035715e-07, "loss": 0.0601, "projector_lr": 5.244241776107145e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.75, "sft_loss": 0.9921875, "step": 4551 }, { "dpo_loss": 0.0927734375, "epoch": 0.73, "final_loss": 0.0927734375, "grad_norm": 0.0, "learning_rate": 1.7461524743648153e-07, "loss": 0.0697, "projector_lr": 5.238457423094446e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.59375, "sft_loss": 0.8203125, "step": 4552 }, { "dpo_loss": 0.365234375, "epoch": 0.73, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 1.7442251956451226e-07, "loss": 0.331, "projector_lr": 5.232675586935368e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.3125, "sft_loss": 0.8203125, "step": 4553 }, { "dpo_loss": 0.01385498046875, "epoch": 0.73, "final_loss": 0.01385498046875, "grad_norm": 0.0, "learning_rate": 1.7422987563735499e-07, "loss": 0.0947, "projector_lr": 5.226896269120649e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.15625, "sft_loss": 0.83203125, "step": 4554 }, { "dpo_loss": 0.1083984375, "epoch": 0.73, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 1.7403731570468034e-07, "loss": 0.2956, "projector_lr": 5.22111947114041e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.25, "sft_loss": 0.65625, "step": 4555 }, { "dpo_loss": 0.00982666015625, "epoch": 0.73, "final_loss": 0.00982666015625, "grad_norm": 0.0, "learning_rate": 1.7384483981613613e-07, "loss": 0.0128, "projector_lr": 5.215345194484084e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.09375, "sft_loss": 0.73046875, "step": 4556 }, { "dpo_loss": 0.0228271484375, "epoch": 0.73, "final_loss": 0.0228271484375, "grad_norm": 0.0, "learning_rate": 1.7365244802134953e-07, "loss": 0.174, "projector_lr": 5.209573440640486e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.84375, "sft_loss": 0.796875, "step": 4557 }, { "dpo_loss": 0.03271484375, "epoch": 0.73, "final_loss": 0.03271484375, "grad_norm": 0.0, "learning_rate": 1.7346014036992528e-07, "loss": 0.1608, "projector_lr": 5.203804211097759e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.5, "sft_loss": 0.921875, "step": 4558 }, { "dpo_loss": 0.25390625, "epoch": 0.73, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 1.7326791691144716e-07, "loss": 0.1731, "projector_lr": 5.198037507343415e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6015625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.125, "sft_loss": 0.69921875, "step": 4559 }, { "dpo_loss": 0.263671875, "epoch": 0.73, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 1.7307577769547654e-07, "loss": 0.1873, "projector_lr": 5.192273330864296e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.0625, "sft_loss": 0.640625, "step": 4560 }, { "dpo_loss": 0.011474609375, "epoch": 0.73, "final_loss": 0.011474609375, "grad_norm": 0.0, "learning_rate": 1.728837227715535e-07, "loss": 0.1728, "projector_lr": 5.186511683146606e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.71875, "sft_loss": 0.796875, "step": 4561 }, { "dpo_loss": 0.2236328125, "epoch": 0.73, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 1.7269175218919595e-07, "loss": 0.1537, "projector_lr": 5.180752565675879e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.59375, "sft_loss": 0.87109375, "step": 4562 }, { "dpo_loss": 0.2177734375, "epoch": 0.73, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 1.7249986599790085e-07, "loss": 0.2975, "projector_lr": 5.174995979937026e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.1875, "sft_loss": 0.59375, "step": 4563 }, { "dpo_loss": 0.029296875, "epoch": 0.73, "final_loss": 0.029296875, "grad_norm": 0.0, "learning_rate": 1.7230806424714262e-07, "loss": 0.2576, "projector_lr": 5.169241927414279e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.25, "sft_loss": 0.6328125, "step": 4564 }, { "dpo_loss": 0.20703125, "epoch": 0.73, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 1.7211634698637422e-07, "loss": 0.3122, "projector_lr": 5.163490409591227e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.84375, "sft_loss": 0.92578125, "step": 4565 }, { "dpo_loss": 0.396484375, "epoch": 0.73, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 1.7192471426502665e-07, "loss": 0.3193, "projector_lr": 5.1577414279508e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.875, "sft_loss": 0.90625, "step": 4566 }, { "dpo_loss": 0.0625, "epoch": 0.73, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 1.717331661325096e-07, "loss": 0.1298, "projector_lr": 5.151994983975288e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.75, "sft_loss": 0.7734375, "step": 4567 }, { "dpo_loss": 0.048828125, "epoch": 0.73, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 1.7154170263821028e-07, "loss": 0.1756, "projector_lr": 5.146251079146309e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.15625, "sft_loss": 0.9453125, "step": 4568 }, { "dpo_loss": 0.1474609375, "epoch": 0.73, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 1.7135032383149467e-07, "loss": 0.1719, "projector_lr": 5.140509714944841e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.25, "sft_loss": 0.75390625, "step": 4569 }, { "dpo_loss": 0.314453125, "epoch": 0.73, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 1.7115902976170658e-07, "loss": 0.333, "projector_lr": 5.134770892851198e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.6875, "sft_loss": 0.7421875, "step": 4570 }, { "dpo_loss": 0.061279296875, "epoch": 0.73, "final_loss": 0.061279296875, "grad_norm": 0.0, "learning_rate": 1.7096782047816805e-07, "loss": 0.0758, "projector_lr": 5.129034614345041e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.59375, "sft_loss": 0.703125, "step": 4571 }, { "dpo_loss": 0.041259765625, "epoch": 0.73, "final_loss": 0.041259765625, "grad_norm": 0.0, "learning_rate": 1.7077669603017902e-07, "loss": 0.2383, "projector_lr": 5.123300880905371e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.34375, "sft_loss": 0.79296875, "step": 4572 }, { "dpo_loss": 0.2099609375, "epoch": 0.73, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 1.7058565646701822e-07, "loss": 0.2187, "projector_lr": 5.117569694010547e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.84375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -6.15625, "sft_loss": 0.87890625, "step": 4573 }, { "dpo_loss": 0.318359375, "epoch": 0.73, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 1.703947018379418e-07, "loss": 0.2058, "projector_lr": 5.111841055138254e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.3125, "sft_loss": 0.5859375, "step": 4574 }, { "dpo_loss": 0.314453125, "epoch": 0.73, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 1.702038321921842e-07, "loss": 0.2259, "projector_lr": 5.106114965765527e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.53125, "sft_loss": 0.6328125, "step": 4575 }, { "dpo_loss": 0.09912109375, "epoch": 0.73, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 1.7001304757895828e-07, "loss": 0.1064, "projector_lr": 5.100391427368749e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.478515625, "rewards_train/margins": 5.125, "rewards_train/rejected": -4.65625, "sft_loss": 0.373046875, "step": 4576 }, { "dpo_loss": 0.0517578125, "epoch": 0.73, "final_loss": 0.0517578125, "grad_norm": 0.0, "learning_rate": 1.698223480474546e-07, "loss": 0.2162, "projector_lr": 5.094670441423638e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -7.34375, "sft_loss": 0.9140625, "step": 4577 }, { "dpo_loss": 0.166015625, "epoch": 0.73, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 1.696317336468419e-07, "loss": 0.1339, "projector_lr": 5.088952009405258e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.953125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -6.0, "sft_loss": 0.84765625, "step": 4578 }, { "dpo_loss": 0.1728515625, "epoch": 0.73, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 1.6944120442626686e-07, "loss": 0.2105, "projector_lr": 5.083236132788006e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.625, "sft_loss": 0.65234375, "step": 4579 }, { "dpo_loss": 0.0269775390625, "epoch": 0.73, "final_loss": 0.0269775390625, "grad_norm": 0.0, "learning_rate": 1.6925076043485458e-07, "loss": 0.0906, "projector_lr": 5.077522813045638e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.65625, "sft_loss": 0.5234375, "step": 4580 }, { "dpo_loss": 0.166015625, "epoch": 0.73, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 1.690604017217076e-07, "loss": 0.1058, "projector_lr": 5.071812051651228e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.1875, "sft_loss": 0.859375, "step": 4581 }, { "dpo_loss": 0.2001953125, "epoch": 0.73, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 1.6887012833590724e-07, "loss": 0.1044, "projector_lr": 5.066103850077218e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.53125, "sft_loss": 0.765625, "step": 4582 }, { "dpo_loss": 0.1826171875, "epoch": 0.73, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 1.6867994032651184e-07, "loss": 0.2253, "projector_lr": 5.060398209795355e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.046875, "rewards_train/rejected": -3.671875, "sft_loss": 0.72265625, "step": 4583 }, { "dpo_loss": 0.50390625, "epoch": 0.73, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 1.6848983774255865e-07, "loss": 0.3061, "projector_lr": 5.05469513227676e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.46875, "sft_loss": 0.66796875, "step": 4584 }, { "dpo_loss": 0.06689453125, "epoch": 0.73, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 1.682998206330622e-07, "loss": 0.0787, "projector_lr": 5.048994618991867e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.0, "sft_loss": 0.88671875, "step": 4585 }, { "dpo_loss": 0.0908203125, "epoch": 0.73, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 1.6810988904701555e-07, "loss": 0.2171, "projector_lr": 5.043296671410467e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.40625, "sft_loss": 0.8125, "step": 4586 }, { "dpo_loss": 0.06201171875, "epoch": 0.73, "final_loss": 0.06201171875, "grad_norm": 0.0, "learning_rate": 1.679200430333894e-07, "loss": 0.0591, "projector_lr": 5.037601291001683e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.375, "sft_loss": 0.80078125, "step": 4587 }, { "dpo_loss": 0.67578125, "epoch": 0.73, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 1.6773028264113236e-07, "loss": 0.3985, "projector_lr": 5.031908479233971e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.140625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.90625, "sft_loss": 0.66015625, "step": 4588 }, { "dpo_loss": 0.033447265625, "epoch": 0.73, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 1.6754060791917091e-07, "loss": 0.1402, "projector_lr": 5.026218237575127e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.375, "sft_loss": 0.71484375, "step": 4589 }, { "dpo_loss": 0.177734375, "epoch": 0.73, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 1.6735101891640986e-07, "loss": 0.1689, "projector_lr": 5.020530567492295e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.71875, "sft_loss": 0.71875, "step": 4590 }, { "dpo_loss": 0.1640625, "epoch": 0.73, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 1.6716151568173144e-07, "loss": 0.1703, "projector_lr": 5.014845470451943e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.4375, "sft_loss": 0.609375, "step": 4591 }, { "dpo_loss": 0.0986328125, "epoch": 0.73, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 1.66972098263996e-07, "loss": 0.0995, "projector_lr": 5.00916294791988e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.25, "sft_loss": 0.6796875, "step": 4592 }, { "dpo_loss": 0.09326171875, "epoch": 0.73, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 1.667827667120415e-07, "loss": 0.1225, "projector_lr": 5.003483001361245e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.78125, "sft_loss": 0.76953125, "step": 4593 }, { "dpo_loss": 0.08935546875, "epoch": 0.74, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 1.6659352107468438e-07, "loss": 0.1311, "projector_lr": 4.997805632240532e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.4375, "sft_loss": 0.890625, "step": 4594 }, { "dpo_loss": 0.4765625, "epoch": 0.74, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 1.6640436140071805e-07, "loss": 0.3273, "projector_lr": 4.992130842021542e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.75, "sft_loss": 0.6328125, "step": 4595 }, { "dpo_loss": 0.189453125, "epoch": 0.74, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 1.662152877389149e-07, "loss": 0.1191, "projector_lr": 4.986458632167447e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.65625, "sft_loss": 0.8359375, "step": 4596 }, { "dpo_loss": 0.08984375, "epoch": 0.74, "final_loss": 0.08984375, "grad_norm": 0.0, "learning_rate": 1.660263001380237e-07, "loss": 0.0604, "projector_lr": 4.980789004140712e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.5, "sft_loss": 0.9921875, "step": 4597 }, { "dpo_loss": 0.2236328125, "epoch": 0.74, "final_loss": 0.2236328125, "grad_norm": 0.0, "learning_rate": 1.6583739864677233e-07, "loss": 0.1304, "projector_lr": 4.97512195940317e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 7.25, "rewards_train/rejected": -7.9375, "sft_loss": 0.51953125, "step": 4598 }, { "dpo_loss": 0.0167236328125, "epoch": 0.74, "final_loss": 0.0167236328125, "grad_norm": 0.0, "learning_rate": 1.656485833138656e-07, "loss": 0.0698, "projector_lr": 4.969457499415969e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.0, "sft_loss": 0.984375, "step": 4599 }, { "dpo_loss": 0.083984375, "epoch": 0.74, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 1.6545985418798697e-07, "loss": 0.0568, "projector_lr": 4.963795625639609e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.5625, "sft_loss": 1.921875, "step": 4600 }, { "dpo_loss": 0.0517578125, "epoch": 0.74, "final_loss": 0.0517578125, "grad_norm": 0.0, "learning_rate": 1.6527121131779643e-07, "loss": 0.1387, "projector_lr": 4.958136339533893e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.625, "sft_loss": 0.85546875, "step": 4601 }, { "dpo_loss": 0.1611328125, "epoch": 0.74, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 1.65082654751933e-07, "loss": 0.1327, "projector_lr": 4.95247964255799e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.734375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.6875, "sft_loss": 0.87890625, "step": 4602 }, { "dpo_loss": 0.2294921875, "epoch": 0.74, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 1.6489418453901254e-07, "loss": 0.3203, "projector_lr": 4.946825536170377e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.9375, "sft_loss": 0.984375, "step": 4603 }, { "dpo_loss": 0.14453125, "epoch": 0.74, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 1.6470580072762934e-07, "loss": 0.0981, "projector_lr": 4.94117402182888e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.875, "sft_loss": 0.5703125, "step": 4604 }, { "dpo_loss": 0.6875, "epoch": 0.74, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 1.6451750336635495e-07, "loss": 0.3561, "projector_lr": 4.935525100990649e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 1.671875, "rewards_train/rejected": -2.765625, "sft_loss": 0.75, "step": 4605 }, { "dpo_loss": 0.2041015625, "epoch": 0.74, "final_loss": 0.2041015625, "grad_norm": 0.0, "learning_rate": 1.6432929250373866e-07, "loss": 0.2928, "projector_lr": 4.929878775112161e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.46875, "rewards_train/margins": 3.859375, "rewards_train/rejected": -5.34375, "sft_loss": 1.1796875, "step": 4606 }, { "dpo_loss": 0.58984375, "epoch": 0.74, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 1.6414116818830748e-07, "loss": 0.3915, "projector_lr": 4.924235045649224e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.03125, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.40625, "sft_loss": 0.89453125, "step": 4607 }, { "dpo_loss": 0.271484375, "epoch": 0.74, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 1.6395313046856646e-07, "loss": 0.4071, "projector_lr": 4.918593914056994e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.78125, "sft_loss": 0.76953125, "step": 4608 }, { "dpo_loss": 0.28125, "epoch": 0.74, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 1.6376517939299795e-07, "loss": 0.1742, "projector_lr": 4.912955381789939e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 3.265625, "rewards_train/rejected": -4.65625, "sft_loss": 0.82421875, "step": 4609 }, { "dpo_loss": 0.1767578125, "epoch": 0.74, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 1.6357731501006173e-07, "loss": 0.1366, "projector_lr": 4.907319450301853e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.875, "rewards_train/rejected": -7.15625, "sft_loss": 0.54296875, "step": 4610 }, { "dpo_loss": 0.0247802734375, "epoch": 0.74, "final_loss": 0.0247802734375, "grad_norm": 0.0, "learning_rate": 1.6338953736819606e-07, "loss": 0.1005, "projector_lr": 4.901686121045882e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.75, "sft_loss": 0.71484375, "step": 4611 }, { "dpo_loss": 0.0026702880859375, "epoch": 0.74, "final_loss": 0.0026702880859375, "grad_norm": 0.0, "learning_rate": 1.6320184651581596e-07, "loss": 0.0102, "projector_lr": 4.89605539547448e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 7.125, "rewards_train/rejected": -7.84375, "sft_loss": 0.8828125, "step": 4612 }, { "dpo_loss": 0.046142578125, "epoch": 0.74, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 1.630142425013145e-07, "loss": 0.0737, "projector_lr": 4.890427275039436e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.21875, "sft_loss": 0.81640625, "step": 4613 }, { "dpo_loss": 0.333984375, "epoch": 0.74, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 1.6282672537306247e-07, "loss": 0.1834, "projector_lr": 4.884801761191874e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.59375, "rewards_train/rejected": -5.125, "sft_loss": 1.4765625, "step": 4614 }, { "dpo_loss": 0.05517578125, "epoch": 0.74, "final_loss": 0.05517578125, "grad_norm": 0.0, "learning_rate": 1.6263929517940795e-07, "loss": 0.0421, "projector_lr": 4.879178855382239e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.75, "sft_loss": 0.9375, "step": 4615 }, { "dpo_loss": 0.033447265625, "epoch": 0.74, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 1.624519519686766e-07, "loss": 0.2032, "projector_lr": 4.873558559060299e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.9375, "sft_loss": 0.80078125, "step": 4616 }, { "dpo_loss": 0.1728515625, "epoch": 0.74, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 1.622646957891722e-07, "loss": 0.2086, "projector_lr": 4.867940873675166e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.9375, "sft_loss": 0.890625, "step": 4617 }, { "dpo_loss": 0.38671875, "epoch": 0.74, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 1.6207752668917529e-07, "loss": 0.2425, "projector_lr": 4.862325800675259e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.40625, "sft_loss": 0.69140625, "step": 4618 }, { "dpo_loss": 0.212890625, "epoch": 0.74, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 1.6189044471694457e-07, "loss": 0.2331, "projector_lr": 4.856713341508338e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -8.0, "sft_loss": 0.8671875, "step": 4619 }, { "dpo_loss": 0.04931640625, "epoch": 0.74, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 1.6170344992071583e-07, "loss": 0.0299, "projector_lr": 4.851103497621475e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.361328125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.84375, "sft_loss": 0.59765625, "step": 4620 }, { "dpo_loss": 0.1318359375, "epoch": 0.74, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 1.6151654234870288e-07, "loss": 0.1539, "projector_lr": 4.845496270461087e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.34375, "sft_loss": 1.03125, "step": 4621 }, { "dpo_loss": 0.3984375, "epoch": 0.74, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 1.6132972204909657e-07, "loss": 0.2364, "projector_lr": 4.839891661472897e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.25, "rewards_train/margins": 2.5, "rewards_train/rejected": -4.75, "sft_loss": 1.09375, "step": 4622 }, { "dpo_loss": 0.35546875, "epoch": 0.74, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 1.6114298907006579e-07, "loss": 0.2621, "projector_lr": 4.834289672101974e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.109375, "rewards_train/margins": 2.40625, "rewards_train/rejected": -4.53125, "sft_loss": 0.8671875, "step": 4623 }, { "dpo_loss": 0.0693359375, "epoch": 0.74, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 1.60956343459756e-07, "loss": 0.1706, "projector_lr": 4.828690303792681e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.5, "sft_loss": 0.921875, "step": 4624 }, { "dpo_loss": 0.12158203125, "epoch": 0.74, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 1.6076978526629121e-07, "loss": 0.1959, "projector_lr": 4.823093557988736e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.0693359375, "rewards_train/margins": 6.78125, "rewards_train/rejected": -6.875, "sft_loss": 0.62890625, "step": 4625 }, { "dpo_loss": 0.265625, "epoch": 0.74, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 1.60583314537772e-07, "loss": 0.172, "projector_lr": 4.817499436133161e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.09375, "rewards_train/margins": 2.890625, "rewards_train/rejected": -3.984375, "sft_loss": 0.71484375, "step": 4626 }, { "dpo_loss": 0.1533203125, "epoch": 0.74, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 1.603969313222774e-07, "loss": 0.1418, "projector_lr": 4.811907939668322e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.46875, "sft_loss": 0.8125, "step": 4627 }, { "dpo_loss": 0.3046875, "epoch": 0.74, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 1.6021063566786248e-07, "loss": 0.2824, "projector_lr": 4.806319070035874e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.1875, "sft_loss": 0.9296875, "step": 4628 }, { "dpo_loss": 0.45703125, "epoch": 0.74, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 1.6002442762256108e-07, "loss": 0.2845, "projector_lr": 4.800732828676832e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.59375, "sft_loss": 0.859375, "step": 4629 }, { "dpo_loss": 0.205078125, "epoch": 0.74, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 1.5983830723438353e-07, "loss": 0.2258, "projector_lr": 4.795149217031506e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.9375, "sft_loss": 0.71484375, "step": 4630 }, { "dpo_loss": 0.15625, "epoch": 0.74, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 1.5965227455131825e-07, "loss": 0.1001, "projector_lr": 4.789568236539548e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.703125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.84375, "sft_loss": 0.54296875, "step": 4631 }, { "dpo_loss": 0.01519775390625, "epoch": 0.74, "final_loss": 0.01519775390625, "grad_norm": 0.0, "learning_rate": 1.594663296213306e-07, "loss": 0.0383, "projector_lr": 4.783989888639919e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.0625, "sft_loss": 0.80078125, "step": 4632 }, { "dpo_loss": 0.1748046875, "epoch": 0.74, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 1.5928047249236336e-07, "loss": 0.2096, "projector_lr": 4.778414174770901e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.03125, "sft_loss": 1.109375, "step": 4633 }, { "dpo_loss": 0.00799560546875, "epoch": 0.74, "final_loss": 0.00799560546875, "grad_norm": 0.0, "learning_rate": 1.5909470321233665e-07, "loss": 0.1512, "projector_lr": 4.7728410963701e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.65625, "sft_loss": 0.88671875, "step": 4634 }, { "dpo_loss": 0.048828125, "epoch": 0.74, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 1.5890902182914828e-07, "loss": 0.0391, "projector_lr": 4.7672706548744486e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.84375, "sft_loss": 1.015625, "step": 4635 }, { "dpo_loss": 0.345703125, "epoch": 0.74, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 1.5872342839067304e-07, "loss": 0.2264, "projector_lr": 4.7617028517201914e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.546875, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.53125, "sft_loss": 0.9765625, "step": 4636 }, { "dpo_loss": 0.2265625, "epoch": 0.74, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 1.585379229447631e-07, "loss": 0.1168, "projector_lr": 4.756137688342893e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.8125, "sft_loss": 0.6328125, "step": 4637 }, { "dpo_loss": 0.314453125, "epoch": 0.74, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 1.583525055392479e-07, "loss": 0.1707, "projector_lr": 4.7505751661774373e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.15625, "sft_loss": 0.68359375, "step": 4638 }, { "dpo_loss": 0.1357421875, "epoch": 0.74, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 1.581671762219346e-07, "loss": 0.0779, "projector_lr": 4.745015286658038e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.9375, "sft_loss": 0.84765625, "step": 4639 }, { "dpo_loss": 0.392578125, "epoch": 0.74, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 1.579819350406069e-07, "loss": 0.2078, "projector_lr": 4.7394580512182076e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.4375, "sft_loss": 0.73828125, "step": 4640 }, { "dpo_loss": 0.1044921875, "epoch": 0.74, "final_loss": 0.1044921875, "grad_norm": 0.0, "learning_rate": 1.577967820430268e-07, "loss": 0.1994, "projector_lr": 4.733903461290805e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.058349609375, "rewards_train/margins": 7.6875, "rewards_train/rejected": -7.75, "sft_loss": 0.51171875, "step": 4641 }, { "dpo_loss": 0.255859375, "epoch": 0.74, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 1.5761171727693234e-07, "loss": 0.2297, "projector_lr": 4.7283515183079705e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.640625, "rewards_train/margins": 2.6875, "rewards_train/rejected": -4.3125, "sft_loss": 0.7421875, "step": 4642 }, { "dpo_loss": 0.275390625, "epoch": 0.74, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 1.5742674079003976e-07, "loss": 0.4615, "projector_lr": 4.722802223701193e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 2.9375, "rewards_train/rejected": -4.0625, "sft_loss": 0.890625, "step": 4643 }, { "dpo_loss": 0.369140625, "epoch": 0.74, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 1.5724185263004202e-07, "loss": 0.3911, "projector_lr": 4.717255578901261e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.5625, "sft_loss": 0.63671875, "step": 4644 }, { "dpo_loss": 0.255859375, "epoch": 0.74, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 1.570570528446099e-07, "loss": 0.2386, "projector_lr": 4.711711585338297e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96875, "rewards_train/margins": 6.90625, "rewards_train/rejected": -7.875, "sft_loss": 0.86328125, "step": 4645 }, { "dpo_loss": 0.05029296875, "epoch": 0.74, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 1.5687234148139045e-07, "loss": 0.3276, "projector_lr": 4.706170244441714e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.0, "sft_loss": 0.82421875, "step": 4646 }, { "dpo_loss": 0.11669921875, "epoch": 0.74, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 1.5668771858800888e-07, "loss": 0.2146, "projector_lr": 4.700631557640267e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.3125, "sft_loss": 0.51953125, "step": 4647 }, { "dpo_loss": 0.220703125, "epoch": 0.74, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 1.565031842120668e-07, "loss": 0.1568, "projector_lr": 4.695095526362005e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.84375, "sft_loss": 0.796875, "step": 4648 }, { "dpo_loss": 0.0869140625, "epoch": 0.74, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 1.5631873840114383e-07, "loss": 0.0992, "projector_lr": 4.6895621520343155e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.65625, "sft_loss": 0.79296875, "step": 4649 }, { "dpo_loss": 0.40234375, "epoch": 0.74, "final_loss": 0.40234375, "grad_norm": 0.0, "learning_rate": 1.5613438120279604e-07, "loss": 0.2699, "projector_lr": 4.6840314360838814e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.1875, "rewards_train/margins": 3.0, "rewards_train/rejected": -5.1875, "sft_loss": 0.875, "step": 4650 }, { "dpo_loss": 0.26171875, "epoch": 0.74, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 1.5595011266455693e-07, "loss": 0.1912, "projector_lr": 4.678503379936708e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.78125, "sft_loss": 0.859375, "step": 4651 }, { "dpo_loss": 0.0810546875, "epoch": 0.74, "final_loss": 0.0810546875, "grad_norm": 0.0, "learning_rate": 1.5576593283393696e-07, "loss": 0.1543, "projector_lr": 4.672977985018109e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.53125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.65625, "sft_loss": 0.7890625, "step": 4652 }, { "dpo_loss": 0.11572265625, "epoch": 0.74, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 1.555818417584242e-07, "loss": 0.1321, "projector_lr": 4.667455252752726e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.8125, "sft_loss": 0.90234375, "step": 4653 }, { "dpo_loss": 0.453125, "epoch": 0.74, "final_loss": 0.453125, "grad_norm": 0.0, "learning_rate": 1.5539783948548336e-07, "loss": 0.307, "projector_lr": 4.661935184564501e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.5, "sft_loss": 0.74609375, "step": 4654 }, { "dpo_loss": 0.0147705078125, "epoch": 0.74, "final_loss": 0.0147705078125, "grad_norm": 0.0, "learning_rate": 1.5521392606255624e-07, "loss": 0.204, "projector_lr": 4.6564177818766875e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.78125, "sft_loss": 0.71875, "step": 4655 }, { "dpo_loss": 0.23046875, "epoch": 0.74, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 1.550301015370622e-07, "loss": 0.1388, "projector_lr": 4.650903046111866e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.90625, "sft_loss": 0.8046875, "step": 4656 }, { "dpo_loss": 0.5, "epoch": 0.75, "final_loss": 0.5, "grad_norm": 0.0, "learning_rate": 1.5484636595639727e-07, "loss": 0.359, "projector_lr": 4.6453909786919187e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 1.9140625, "rewards_train/rejected": -2.578125, "sft_loss": 0.76953125, "step": 4657 }, { "dpo_loss": 0.015869140625, "epoch": 0.75, "final_loss": 0.015869140625, "grad_norm": 0.0, "learning_rate": 1.546627193679344e-07, "loss": 0.0839, "projector_lr": 4.6398815810380326e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.40625, "sft_loss": 0.953125, "step": 4658 }, { "dpo_loss": 0.43359375, "epoch": 0.75, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 1.544791618190243e-07, "loss": 0.2351, "projector_lr": 4.6343748545707296e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 2.28125, "rewards_train/rejected": -3.578125, "sft_loss": 0.9140625, "step": 4659 }, { "dpo_loss": 0.2177734375, "epoch": 0.75, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 1.54295693356994e-07, "loss": 0.3136, "projector_lr": 4.6288708007098203e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.90625, "sft_loss": 0.7421875, "step": 4660 }, { "dpo_loss": 0.171875, "epoch": 0.75, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 1.5411231402914775e-07, "loss": 0.1514, "projector_lr": 4.623369420874433e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.34375, "sft_loss": 1.03125, "step": 4661 }, { "dpo_loss": 0.08837890625, "epoch": 0.75, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 1.5392902388276718e-07, "loss": 0.1233, "projector_lr": 4.6178707164830157e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.59375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.90625, "sft_loss": 0.91015625, "step": 4662 }, { "dpo_loss": 0.53515625, "epoch": 0.75, "final_loss": 0.53515625, "grad_norm": 0.0, "learning_rate": 1.5374582296511053e-07, "loss": 0.3356, "projector_lr": 4.612374688953316e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 3.03125, "rewards_train/rejected": -4.4375, "sft_loss": 0.80078125, "step": 4663 }, { "dpo_loss": 0.0272216796875, "epoch": 0.75, "final_loss": 0.0272216796875, "grad_norm": 0.0, "learning_rate": 1.5356271132341303e-07, "loss": 0.0936, "projector_lr": 4.606881339702391e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.921875, "rewards_train/margins": 6.75, "rewards_train/rejected": -8.625, "sft_loss": 1.0546875, "step": 4664 }, { "dpo_loss": 0.03515625, "epoch": 0.75, "final_loss": 0.03515625, "grad_norm": 0.0, "learning_rate": 1.53379689004887e-07, "loss": 0.0475, "projector_lr": 4.601390670146611e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.25, "sft_loss": 0.73828125, "step": 4665 }, { "dpo_loss": 0.10546875, "epoch": 0.75, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 1.53196756056722e-07, "loss": 0.2222, "projector_lr": 4.5959026817016603e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.375, "sft_loss": 0.5390625, "step": 4666 }, { "dpo_loss": 0.466796875, "epoch": 0.75, "final_loss": 0.466796875, "grad_norm": 0.0, "learning_rate": 1.5301391252608398e-07, "loss": 0.3448, "projector_lr": 4.5904173757825194e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 2.640625, "rewards_train/rejected": -3.34375, "sft_loss": 0.65234375, "step": 4667 }, { "dpo_loss": 0.0654296875, "epoch": 0.75, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 1.5283115846011663e-07, "loss": 0.0417, "projector_lr": 4.584934753803499e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.380859375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.5, "sft_loss": 0.71484375, "step": 4668 }, { "dpo_loss": 0.134765625, "epoch": 0.75, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 1.5264849390593937e-07, "loss": 0.5342, "projector_lr": 4.5794548171781814e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.65625, "sft_loss": 0.78515625, "step": 4669 }, { "dpo_loss": 0.130859375, "epoch": 0.75, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 1.5246591891064986e-07, "loss": 0.0978, "projector_lr": 4.573977567319496e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.15625, "sft_loss": 0.87890625, "step": 4670 }, { "dpo_loss": 0.248046875, "epoch": 0.75, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 1.5228343352132167e-07, "loss": 0.247, "projector_lr": 4.5685030056396507e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.953125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -5.03125, "sft_loss": 0.67578125, "step": 4671 }, { "dpo_loss": 0.40625, "epoch": 0.75, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 1.521010377850062e-07, "loss": 0.2884, "projector_lr": 4.5630311335501866e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.10302734375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.5, "sft_loss": 0.61328125, "step": 4672 }, { "dpo_loss": 0.37109375, "epoch": 0.75, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 1.519187317487305e-07, "loss": 0.2024, "projector_lr": 4.557561952461915e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.3125, "sft_loss": 0.76953125, "step": 4673 }, { "dpo_loss": 0.1826171875, "epoch": 0.75, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 1.5173651545949967e-07, "loss": 0.1967, "projector_lr": 4.5520954637849906e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.34375, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.28125, "sft_loss": 0.546875, "step": 4674 }, { "dpo_loss": 0.458984375, "epoch": 0.75, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 1.5155438896429496e-07, "loss": 0.3305, "projector_lr": 4.5466316689288487e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.890625, "sft_loss": 1.015625, "step": 4675 }, { "dpo_loss": 0.1171875, "epoch": 0.75, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 1.5137235231007488e-07, "loss": 0.0902, "projector_lr": 4.541170569302247e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.6875, "sft_loss": 0.74609375, "step": 4676 }, { "dpo_loss": 0.0869140625, "epoch": 0.75, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 1.511904055437746e-07, "loss": 0.1414, "projector_lr": 4.5357121663132385e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.0625, "sft_loss": 0.55078125, "step": 4677 }, { "dpo_loss": 0.05078125, "epoch": 0.75, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 1.5100854871230595e-07, "loss": 0.2194, "projector_lr": 4.5302564613691786e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.4375, "sft_loss": 0.76171875, "step": 4678 }, { "dpo_loss": 0.11572265625, "epoch": 0.75, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 1.5082678186255766e-07, "loss": 0.0703, "projector_lr": 4.5248034558767296e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.34375, "sft_loss": 1.6015625, "step": 4679 }, { "dpo_loss": 0.1767578125, "epoch": 0.75, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 1.506451050413956e-07, "loss": 0.2677, "projector_lr": 4.5193531512418684e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.53125, "sft_loss": 0.84375, "step": 4680 }, { "dpo_loss": 0.1328125, "epoch": 0.75, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 1.5046351829566205e-07, "loss": 0.0677, "projector_lr": 4.5139055488698624e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.125, "sft_loss": 0.609375, "step": 4681 }, { "dpo_loss": 0.44921875, "epoch": 0.75, "final_loss": 0.44921875, "grad_norm": 0.0, "learning_rate": 1.502820216721762e-07, "loss": 0.3224, "projector_lr": 4.508460650165286e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.0625, "sft_loss": 0.8125, "step": 4682 }, { "dpo_loss": 0.1474609375, "epoch": 0.75, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 1.5010061521773372e-07, "loss": 0.1058, "projector_lr": 4.5030184565320113e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.12158203125, "rewards_train/margins": 7.0625, "rewards_train/rejected": -7.1875, "sft_loss": 0.76953125, "step": 4683 }, { "dpo_loss": 0.81640625, "epoch": 0.75, "final_loss": 0.81640625, "grad_norm": 0.0, "learning_rate": 1.4991929897910765e-07, "loss": 0.4972, "projector_lr": 4.49757896937323e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.953125, "rewards_train/margins": 2.328125, "rewards_train/rejected": -4.28125, "sft_loss": 1.0703125, "step": 4684 }, { "dpo_loss": 0.1865234375, "epoch": 0.75, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 1.4973807300304708e-07, "loss": 0.2871, "projector_lr": 4.492142190091413e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.15625, "sft_loss": 0.96484375, "step": 4685 }, { "dpo_loss": 0.35546875, "epoch": 0.75, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 1.4955693733627867e-07, "loss": 0.2764, "projector_lr": 4.48670812008836e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.828125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.1875, "sft_loss": 0.58203125, "step": 4686 }, { "dpo_loss": 0.10546875, "epoch": 0.75, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 1.493758920255046e-07, "loss": 0.0656, "projector_lr": 4.481276760765138e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.40625, "sft_loss": 1.4765625, "step": 4687 }, { "dpo_loss": 0.26953125, "epoch": 0.75, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 1.49194937117405e-07, "loss": 0.3291, "projector_lr": 4.47584811352215e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.890625, "rewards_train/margins": 2.125, "rewards_train/rejected": -4.0, "sft_loss": 0.72265625, "step": 4688 }, { "dpo_loss": 0.06005859375, "epoch": 0.75, "final_loss": 0.06005859375, "grad_norm": 0.0, "learning_rate": 1.490140726586357e-07, "loss": 0.053, "projector_lr": 4.470422179759071e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.6875, "sft_loss": 0.8203125, "step": 4689 }, { "dpo_loss": 0.1689453125, "epoch": 0.75, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 1.4883329869582993e-07, "loss": 0.3069, "projector_lr": 4.4649989608748985e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.40625, "sft_loss": 1.875, "step": 4690 }, { "dpo_loss": 0.373046875, "epoch": 0.75, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 1.4865261527559724e-07, "loss": 0.2368, "projector_lr": 4.459578458267918e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.5, "sft_loss": 0.9140625, "step": 4691 }, { "dpo_loss": 0.004547119140625, "epoch": 0.75, "final_loss": 0.004547119140625, "grad_norm": 0.0, "learning_rate": 1.4847202244452383e-07, "loss": 0.0844, "projector_lr": 4.4541606733357153e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 7.9375, "rewards_train/rejected": -8.8125, "sft_loss": 0.65625, "step": 4692 }, { "dpo_loss": 0.1533203125, "epoch": 0.75, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 1.4829152024917247e-07, "loss": 0.2951, "projector_lr": 4.4487456074751744e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.90625, "sft_loss": 0.63671875, "step": 4693 }, { "dpo_loss": 0.26171875, "epoch": 0.75, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 1.4811110873608295e-07, "loss": 0.1718, "projector_lr": 4.443333262082489e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.59375, "sft_loss": 0.71484375, "step": 4694 }, { "dpo_loss": 0.2421875, "epoch": 0.75, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 1.4793078795177132e-07, "loss": 0.1245, "projector_lr": 4.4379236385531405e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.03125, "sft_loss": 0.6640625, "step": 4695 }, { "dpo_loss": 0.1689453125, "epoch": 0.75, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 1.4775055794273017e-07, "loss": 0.5396, "projector_lr": 4.432516738281905e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.6875, "sft_loss": 1.2265625, "step": 4696 }, { "dpo_loss": 0.12158203125, "epoch": 0.75, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 1.4757041875542914e-07, "loss": 0.0943, "projector_lr": 4.4271125626628744e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.40625, "sft_loss": 0.91796875, "step": 4697 }, { "dpo_loss": 0.07470703125, "epoch": 0.75, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 1.4739037043631397e-07, "loss": 0.0451, "projector_lr": 4.4217111130894193e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.09375, "sft_loss": 0.77734375, "step": 4698 }, { "dpo_loss": 0.146484375, "epoch": 0.75, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 1.4721041303180725e-07, "loss": 0.0944, "projector_lr": 4.416312390954218e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 6.6875, "rewards_train/rejected": -7.34375, "sft_loss": 0.8359375, "step": 4699 }, { "dpo_loss": 0.1748046875, "epoch": 0.75, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 1.4703054658830788e-07, "loss": 0.098, "projector_lr": 4.410916397649237e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.01318359375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -5.59375, "sft_loss": 0.58203125, "step": 4700 }, { "dpo_loss": 0.109375, "epoch": 0.75, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 1.468507711521917e-07, "loss": 0.0751, "projector_lr": 4.405523134565751e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.328125, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.25, "sft_loss": 0.609375, "step": 4701 }, { "dpo_loss": 0.06298828125, "epoch": 0.75, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 1.466710867698106e-07, "loss": 0.1016, "projector_lr": 4.4001326030943183e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.193359375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -4.625, "sft_loss": 0.73828125, "step": 4702 }, { "dpo_loss": 0.01904296875, "epoch": 0.75, "final_loss": 0.01904296875, "grad_norm": 0.0, "learning_rate": 1.4649149348749356e-07, "loss": 0.0387, "projector_lr": 4.3947448046248073e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.455078125, "rewards_train/margins": 7.1875, "rewards_train/rejected": -7.65625, "sft_loss": 0.96875, "step": 4703 }, { "dpo_loss": 0.017822265625, "epoch": 0.75, "final_loss": 0.017822265625, "grad_norm": 0.0, "learning_rate": 1.4631199135154566e-07, "loss": 0.0928, "projector_lr": 4.38935974054637e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.0625, "sft_loss": 0.75, "step": 4704 }, { "dpo_loss": 0.1474609375, "epoch": 0.75, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 1.461325804082485e-07, "loss": 0.0783, "projector_lr": 4.383977412247455e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.0, "sft_loss": 0.99609375, "step": 4705 }, { "dpo_loss": 0.1591796875, "epoch": 0.75, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 1.459532607038601e-07, "loss": 0.2433, "projector_lr": 4.378597821115803e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.375, "sft_loss": 1.0859375, "step": 4706 }, { "dpo_loss": 0.006439208984375, "epoch": 0.75, "final_loss": 0.006439208984375, "grad_norm": 0.0, "learning_rate": 1.4577403228461543e-07, "loss": 0.1889, "projector_lr": 4.3732209685384636e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11962890625, "rewards_train/margins": 7.09375, "rewards_train/rejected": -7.21875, "sft_loss": 0.66015625, "step": 4707 }, { "dpo_loss": 0.146484375, "epoch": 0.75, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 1.4559489519672546e-07, "loss": 0.1317, "projector_lr": 4.367846855901764e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.8125, "sft_loss": 0.61328125, "step": 4708 }, { "dpo_loss": 0.154296875, "epoch": 0.75, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 1.4541584948637774e-07, "loss": 0.2581, "projector_lr": 4.3624754845913326e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83984375, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.375, "sft_loss": 0.51953125, "step": 4709 }, { "dpo_loss": 0.1630859375, "epoch": 0.75, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 1.4523689519973615e-07, "loss": 0.2071, "projector_lr": 4.3571068559920844e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.765625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.53125, "sft_loss": 0.6640625, "step": 4710 }, { "dpo_loss": 0.2080078125, "epoch": 0.75, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 1.4505803238294133e-07, "loss": 0.2091, "projector_lr": 4.3517409714882404e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -5.5625, "sft_loss": 1.34375, "step": 4711 }, { "dpo_loss": 0.0634765625, "epoch": 0.75, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 1.448792610821099e-07, "loss": 0.1927, "projector_lr": 4.3463778324632977e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.03125, "sft_loss": 0.84765625, "step": 4712 }, { "dpo_loss": 0.98046875, "epoch": 0.75, "final_loss": 0.98046875, "grad_norm": 0.0, "learning_rate": 1.4470058134333558e-07, "loss": 0.5297, "projector_lr": 4.3410174403000675e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.203125, "rewards_train/margins": 2.25, "rewards_train/rejected": -3.453125, "sft_loss": 0.71875, "step": 4713 }, { "dpo_loss": 0.01611328125, "epoch": 0.75, "final_loss": 0.01611328125, "grad_norm": 0.0, "learning_rate": 1.4452199321268728e-07, "loss": 0.054, "projector_lr": 4.3356597963806183e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.03125, "sft_loss": 0.71875, "step": 4714 }, { "dpo_loss": 0.46484375, "epoch": 0.75, "final_loss": 0.46484375, "grad_norm": 0.0, "learning_rate": 1.4434349673621161e-07, "loss": 0.2866, "projector_lr": 4.3303049020863484e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.09375, "sft_loss": 0.6640625, "step": 4715 }, { "dpo_loss": 0.1357421875, "epoch": 0.75, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 1.4416509195993053e-07, "loss": 0.0902, "projector_lr": 4.3249527587979163e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.78125, "sft_loss": 0.7265625, "step": 4716 }, { "dpo_loss": 0.1962890625, "epoch": 0.75, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 1.439867789298434e-07, "loss": 0.1513, "projector_lr": 4.319603367895302e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.53125, "sft_loss": 0.765625, "step": 4717 }, { "dpo_loss": 0.412109375, "epoch": 0.75, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 1.4380855769192456e-07, "loss": 0.2427, "projector_lr": 4.3142567307577373e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.875, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.890625, "sft_loss": 0.7109375, "step": 4718 }, { "dpo_loss": 0.369140625, "epoch": 0.76, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 1.4363042829212595e-07, "loss": 0.3764, "projector_lr": 4.308912848763779e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.09375, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.59375, "sft_loss": 1.0859375, "step": 4719 }, { "dpo_loss": 0.1572265625, "epoch": 0.76, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 1.4345239077637499e-07, "loss": 0.1158, "projector_lr": 4.3035717232912504e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.625, "sft_loss": 0.6796875, "step": 4720 }, { "dpo_loss": 0.279296875, "epoch": 0.76, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 1.4327444519057608e-07, "loss": 0.3808, "projector_lr": 4.298233355717283e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.84375, "sft_loss": 0.95703125, "step": 4721 }, { "dpo_loss": 0.0027313232421875, "epoch": 0.76, "final_loss": 0.0027313232421875, "grad_norm": 0.0, "learning_rate": 1.4309659158060932e-07, "loss": 0.1467, "projector_lr": 4.29289774741828e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 8.0625, "rewards_train/rejected": -9.125, "sft_loss": 0.8515625, "step": 4722 }, { "dpo_loss": 0.333984375, "epoch": 0.76, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 1.4291882999233147e-07, "loss": 0.2881, "projector_lr": 4.287564899769944e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.060546875, "rewards_train/margins": 3.5, "rewards_train/rejected": -3.5625, "sft_loss": 0.828125, "step": 4723 }, { "dpo_loss": 0.412109375, "epoch": 0.76, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 1.4274116047157518e-07, "loss": 0.4456, "projector_lr": 4.2822348141472563e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.9375, "sft_loss": 0.6484375, "step": 4724 }, { "dpo_loss": 0.11767578125, "epoch": 0.76, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 1.4256358306415002e-07, "loss": 0.0636, "projector_lr": 4.2769074919245007e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.1875, "sft_loss": 0.6796875, "step": 4725 }, { "dpo_loss": 0.14453125, "epoch": 0.76, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 1.4238609781584122e-07, "loss": 0.1435, "projector_lr": 4.271582934475237e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.419921875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.5625, "sft_loss": 0.7421875, "step": 4726 }, { "dpo_loss": 0.056884765625, "epoch": 0.76, "final_loss": 0.056884765625, "grad_norm": 0.0, "learning_rate": 1.4220870477241036e-07, "loss": 0.0917, "projector_lr": 4.2662611431723114e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.609375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.09375, "sft_loss": 0.72265625, "step": 4727 }, { "dpo_loss": 0.1591796875, "epoch": 0.76, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 1.420314039795953e-07, "loss": 0.0859, "projector_lr": 4.260942119387859e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.02197265625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.25, "sft_loss": 0.48828125, "step": 4728 }, { "dpo_loss": 0.0257568359375, "epoch": 0.76, "final_loss": 0.0257568359375, "grad_norm": 0.0, "learning_rate": 1.418541954831104e-07, "loss": 0.109, "projector_lr": 4.255625864493312e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.75, "sft_loss": 0.75390625, "step": 4729 }, { "dpo_loss": 0.0751953125, "epoch": 0.76, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 1.4167707932864558e-07, "loss": 0.0742, "projector_lr": 4.250312379859368e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.03125, "sft_loss": 0.8125, "step": 4730 }, { "dpo_loss": 0.015869140625, "epoch": 0.76, "final_loss": 0.015869140625, "grad_norm": 0.0, "learning_rate": 1.4150005556186777e-07, "loss": 0.0339, "projector_lr": 4.245001666856034e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.625, "sft_loss": 0.79296875, "step": 4731 }, { "dpo_loss": 0.1103515625, "epoch": 0.76, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 1.4132312422841946e-07, "loss": 0.0623, "projector_lr": 4.2396937268525844e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.359375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.53125, "sft_loss": 0.58203125, "step": 4732 }, { "dpo_loss": 0.2734375, "epoch": 0.76, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 1.4114628537391943e-07, "loss": 0.466, "projector_lr": 4.234388561217583e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.4375, "sft_loss": 0.65625, "step": 4733 }, { "dpo_loss": 0.63671875, "epoch": 0.76, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 1.4096953904396254e-07, "loss": 0.382, "projector_lr": 4.229086171318876e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 1.734375, "rewards_train/rejected": -2.921875, "sft_loss": 0.953125, "step": 4734 }, { "dpo_loss": 0.060546875, "epoch": 0.76, "final_loss": 0.060546875, "grad_norm": 0.0, "learning_rate": 1.4079288528412025e-07, "loss": 0.0541, "projector_lr": 4.223786558523608e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.625, "sft_loss": 0.8671875, "step": 4735 }, { "dpo_loss": 0.166015625, "epoch": 0.76, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 1.4061632413993967e-07, "loss": 0.1035, "projector_lr": 4.218489724198191e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.09375, "sft_loss": 0.9296875, "step": 4736 }, { "dpo_loss": 0.263671875, "epoch": 0.76, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 1.404398556569441e-07, "loss": 0.3218, "projector_lr": 4.2131956697083235e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.03125, "sft_loss": 0.83984375, "step": 4737 }, { "dpo_loss": 0.263671875, "epoch": 0.76, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 1.402634798806333e-07, "loss": 0.2957, "projector_lr": 4.2079043964189994e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 3.53125, "rewards_train/rejected": -5.03125, "sft_loss": 1.203125, "step": 4738 }, { "dpo_loss": 0.0260009765625, "epoch": 0.76, "final_loss": 0.0260009765625, "grad_norm": 0.0, "learning_rate": 1.4008719685648274e-07, "loss": 0.071, "projector_lr": 4.2026159056944826e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.2890625, "rewards_train/margins": 6.9375, "rewards_train/rejected": -6.625, "sft_loss": 0.7421875, "step": 4739 }, { "dpo_loss": 0.2734375, "epoch": 0.76, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 1.399110066299441e-07, "loss": 0.1959, "projector_lr": 4.197330198898323e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.392578125, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.453125, "sft_loss": 0.59765625, "step": 4740 }, { "dpo_loss": 0.0024261474609375, "epoch": 0.76, "final_loss": 0.0024261474609375, "grad_norm": 0.0, "learning_rate": 1.3973490924644494e-07, "loss": 0.0145, "projector_lr": 4.192047277393348e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.828125, "rewards_train/margins": 7.5625, "rewards_train/rejected": -8.375, "sft_loss": 0.62109375, "step": 4741 }, { "dpo_loss": 0.00836181640625, "epoch": 0.76, "final_loss": 0.00836181640625, "grad_norm": 0.0, "learning_rate": 1.3955890475138944e-07, "loss": 0.1523, "projector_lr": 4.186767142541683e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.416015625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.65625, "sft_loss": 0.8203125, "step": 4742 }, { "dpo_loss": 0.1298828125, "epoch": 0.76, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 1.3938299319015712e-07, "loss": 0.3775, "projector_lr": 4.1814897957047137e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.9375, "sft_loss": 0.9140625, "step": 4743 }, { "dpo_loss": 0.212890625, "epoch": 0.76, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 1.392071746081044e-07, "loss": 0.3202, "projector_lr": 4.1762152382431316e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.6875, "sft_loss": 1.0546875, "step": 4744 }, { "dpo_loss": 0.212890625, "epoch": 0.76, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 1.3903144905056258e-07, "loss": 0.1965, "projector_lr": 4.1709434715168775e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8125, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.515625, "sft_loss": 0.77734375, "step": 4745 }, { "dpo_loss": 0.0908203125, "epoch": 0.76, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 1.3885581656284006e-07, "loss": 0.0742, "projector_lr": 4.1656744968852025e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.953125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.1875, "sft_loss": 0.703125, "step": 4746 }, { "dpo_loss": 0.031494140625, "epoch": 0.76, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 1.386802771902205e-07, "loss": 0.1489, "projector_lr": 4.1604083157066156e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2060546875, "rewards_train/margins": 6.6875, "rewards_train/rejected": -6.90625, "sft_loss": 0.83984375, "step": 4747 }, { "dpo_loss": 0.12255859375, "epoch": 0.76, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 1.3850483097796423e-07, "loss": 0.1133, "projector_lr": 4.155144929338927e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.6875, "sft_loss": 0.58984375, "step": 4748 }, { "dpo_loss": 0.046142578125, "epoch": 0.76, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 1.383294779713069e-07, "loss": 0.0467, "projector_lr": 4.149884339139207e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1845703125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.125, "sft_loss": 0.80859375, "step": 4749 }, { "dpo_loss": 0.0712890625, "epoch": 0.76, "final_loss": 0.0712890625, "grad_norm": 0.0, "learning_rate": 1.3815421821546046e-07, "loss": 0.1087, "projector_lr": 4.1446265464638144e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.125, "sft_loss": 0.6953125, "step": 4750 }, { "dpo_loss": 0.34375, "epoch": 0.76, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 1.3797905175561265e-07, "loss": 0.1739, "projector_lr": 4.1393715526683795e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.46875, "sft_loss": 1.09375, "step": 4751 }, { "dpo_loss": 0.177734375, "epoch": 0.76, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 1.378039786369275e-07, "loss": 0.3093, "projector_lr": 4.134119359107825e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.40625, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.65625, "sft_loss": 0.515625, "step": 4752 }, { "dpo_loss": 0.1650390625, "epoch": 0.76, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 1.376289989045446e-07, "loss": 0.3182, "projector_lr": 4.1288699671363383e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 3.34375, "rewards_train/rejected": -3.9375, "sft_loss": 0.625, "step": 4753 }, { "dpo_loss": 0.053466796875, "epoch": 0.76, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 1.3745411260357965e-07, "loss": 0.1715, "projector_lr": 4.1236233781073894e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.21875, "sft_loss": 0.77734375, "step": 4754 }, { "dpo_loss": 0.033447265625, "epoch": 0.76, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 1.3727931977912405e-07, "loss": 0.2289, "projector_lr": 4.118379593373722e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.984375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.96875, "sft_loss": 0.55859375, "step": 4755 }, { "dpo_loss": 0.055419921875, "epoch": 0.76, "final_loss": 0.055419921875, "grad_norm": 0.0, "learning_rate": 1.3710462047624555e-07, "loss": 0.038, "projector_lr": 4.1131386142873666e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.8125, "sft_loss": 0.7890625, "step": 4756 }, { "dpo_loss": 0.130859375, "epoch": 0.76, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 1.3693001473998717e-07, "loss": 0.2301, "projector_lr": 4.1079004421996153e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.125, "sft_loss": 0.66015625, "step": 4757 }, { "dpo_loss": 0.04345703125, "epoch": 0.76, "final_loss": 0.04345703125, "grad_norm": 0.0, "learning_rate": 1.3675550261536862e-07, "loss": 0.0437, "projector_lr": 4.102665078461059e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.28125, "rewards_train/margins": 4.75, "rewards_train/rejected": -7.03125, "sft_loss": 0.6484375, "step": 4758 }, { "dpo_loss": 0.1142578125, "epoch": 0.76, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 1.365810841473844e-07, "loss": 0.1492, "projector_lr": 4.097432524421532e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.345703125, "rewards_train/margins": 6.0625, "rewards_train/rejected": -5.71875, "sft_loss": 0.578125, "step": 4759 }, { "dpo_loss": 0.0126953125, "epoch": 0.76, "final_loss": 0.0126953125, "grad_norm": 0.0, "learning_rate": 1.3640675938100589e-07, "loss": 0.0235, "projector_lr": 4.0922027814301766e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.4375, "sft_loss": 0.87109375, "step": 4760 }, { "dpo_loss": 0.189453125, "epoch": 0.76, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 1.3623252836117954e-07, "loss": 0.1697, "projector_lr": 4.0869758508353865e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.15625, "sft_loss": 1.1328125, "step": 4761 }, { "dpo_loss": 0.158203125, "epoch": 0.76, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 1.3605839113282845e-07, "loss": 0.1973, "projector_lr": 4.0817517339848544e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.65625, "sft_loss": 0.78515625, "step": 4762 }, { "dpo_loss": 0.5546875, "epoch": 0.76, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 1.3588434774085045e-07, "loss": 0.3386, "projector_lr": 4.0765304322255134e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 3.828125, "rewards_train/rejected": -4.59375, "sft_loss": 0.69140625, "step": 4763 }, { "dpo_loss": 0.0017547607421875, "epoch": 0.76, "final_loss": 0.0017547607421875, "grad_norm": 0.0, "learning_rate": 1.357103982301202e-07, "loss": 0.0835, "projector_lr": 4.0713119469036066e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4296875, "rewards_train/margins": 7.125, "rewards_train/rejected": -7.5625, "sft_loss": 0.76171875, "step": 4764 }, { "dpo_loss": 0.1806640625, "epoch": 0.76, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 1.3553654264548742e-07, "loss": 0.4329, "projector_lr": 4.066096279364623e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.5625, "sft_loss": 1.0546875, "step": 4765 }, { "dpo_loss": 0.1572265625, "epoch": 0.76, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 1.3536278103177829e-07, "loss": 0.203, "projector_lr": 4.0608834309533486e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 3.546875, "rewards_train/rejected": -3.71875, "sft_loss": 0.7734375, "step": 4766 }, { "dpo_loss": 0.2734375, "epoch": 0.76, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 1.3518911343379413e-07, "loss": 0.3229, "projector_lr": 4.055673403013824e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.28125, "sft_loss": 0.87109375, "step": 4767 }, { "dpo_loss": 0.6328125, "epoch": 0.76, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 1.350155398963123e-07, "loss": 0.5076, "projector_lr": 4.0504661968893696e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.9375, "rewards_train/margins": 3.109375, "rewards_train/rejected": -6.03125, "sft_loss": 0.859375, "step": 4768 }, { "dpo_loss": 0.0634765625, "epoch": 0.76, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 1.3484206046408585e-07, "loss": 0.1279, "projector_lr": 4.045261813922576e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 6.6875, "rewards_train/rejected": -8.125, "sft_loss": 0.83203125, "step": 4769 }, { "dpo_loss": 0.33203125, "epoch": 0.76, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 1.3466867518184382e-07, "loss": 0.3666, "projector_lr": 4.040060255455315e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.78125, "sft_loss": 0.99609375, "step": 4770 }, { "dpo_loss": 0.287109375, "epoch": 0.76, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 1.3449538409429063e-07, "loss": 0.2626, "projector_lr": 4.034861522828719e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 3.875, "rewards_train/rejected": -5.1875, "sft_loss": 0.75390625, "step": 4771 }, { "dpo_loss": 0.0908203125, "epoch": 0.76, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 1.3432218724610634e-07, "loss": 0.2642, "projector_lr": 4.0296656173831905e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.125, "sft_loss": 0.89453125, "step": 4772 }, { "dpo_loss": 0.16015625, "epoch": 0.76, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 1.341490846819473e-07, "loss": 0.1745, "projector_lr": 4.0244725404584195e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.125, "sft_loss": 0.7421875, "step": 4773 }, { "dpo_loss": 0.609375, "epoch": 0.76, "final_loss": 0.609375, "grad_norm": 0.0, "learning_rate": 1.3397607644644499e-07, "loss": 0.4421, "projector_lr": 4.01928229339335e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.515625, "rewards_train/margins": 2.765625, "rewards_train/rejected": -4.28125, "sft_loss": 0.73828125, "step": 4774 }, { "dpo_loss": 0.1513671875, "epoch": 0.76, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 1.3380316258420654e-07, "loss": 0.101, "projector_lr": 4.014094877526197e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.875, "sft_loss": 0.7890625, "step": 4775 }, { "dpo_loss": 1.0078125, "epoch": 0.76, "final_loss": 1.0078125, "grad_norm": 0.0, "learning_rate": 1.336303431398153e-07, "loss": 0.6545, "projector_lr": 4.0089102941944596e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.90625, "sft_loss": 0.66015625, "step": 4776 }, { "dpo_loss": 0.47265625, "epoch": 0.76, "final_loss": 0.47265625, "grad_norm": 0.0, "learning_rate": 1.334576181578298e-07, "loss": 0.3206, "projector_lr": 4.003728544734894e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.859375, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.5, "sft_loss": 0.69140625, "step": 4777 }, { "dpo_loss": 0.197265625, "epoch": 0.76, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 1.3328498768278418e-07, "loss": 0.2613, "projector_lr": 3.998549630483526e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.072265625, "rewards_train/margins": 3.8125, "rewards_train/rejected": -3.734375, "sft_loss": 0.498046875, "step": 4778 }, { "dpo_loss": 0.0257568359375, "epoch": 0.76, "final_loss": 0.0257568359375, "grad_norm": 0.0, "learning_rate": 1.3311245175918867e-07, "loss": 0.0605, "projector_lr": 3.9933735527756604e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 6.90625, "rewards_train/rejected": -8.0, "sft_loss": 0.95703125, "step": 4779 }, { "dpo_loss": 0.146484375, "epoch": 0.76, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 1.3294001043152865e-07, "loss": 0.1172, "projector_lr": 3.9882003129458596e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4140625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.875, "sft_loss": 0.75, "step": 4780 }, { "dpo_loss": 0.1298828125, "epoch": 0.76, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 1.3276766374426533e-07, "loss": 0.2395, "projector_lr": 3.9830299123279606e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 7.71875, "rewards_train/rejected": -8.625, "sft_loss": 0.84765625, "step": 4781 }, { "dpo_loss": 0.296875, "epoch": 0.77, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 1.325954117418353e-07, "loss": 0.1658, "projector_lr": 3.9778623522550597e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.34375, "sft_loss": 0.765625, "step": 4782 }, { "dpo_loss": 0.06640625, "epoch": 0.77, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 1.3242325446865122e-07, "loss": 0.3924, "projector_lr": 3.9726976340595367e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10205078125, "rewards_train/margins": 5.78125, "rewards_train/rejected": -5.875, "sft_loss": 0.828125, "step": 4783 }, { "dpo_loss": 0.1953125, "epoch": 0.77, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 1.3225119196910068e-07, "loss": 0.1929, "projector_lr": 3.9675357590730204e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.84375, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.3125, "sft_loss": 0.6171875, "step": 4784 }, { "dpo_loss": 0.3046875, "epoch": 0.77, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 1.320792242875477e-07, "loss": 0.2253, "projector_lr": 3.962376728626431e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.375, "sft_loss": 0.9140625, "step": 4785 }, { "dpo_loss": 0.02783203125, "epoch": 0.77, "final_loss": 0.02783203125, "grad_norm": 0.0, "learning_rate": 1.319073514683307e-07, "loss": 0.0435, "projector_lr": 3.9572205440499217e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 6.9375, "rewards_train/rejected": -7.53125, "sft_loss": 0.59375, "step": 4786 }, { "dpo_loss": 0.04638671875, "epoch": 0.77, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 1.3173557355576465e-07, "loss": 0.0827, "projector_lr": 3.9520672066729395e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3984375, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.03125, "sft_loss": 0.77734375, "step": 4787 }, { "dpo_loss": 0.078125, "epoch": 0.77, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 1.3156389059413948e-07, "loss": 0.2511, "projector_lr": 3.9469167178241844e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.03125, "sft_loss": 0.9765625, "step": 4788 }, { "dpo_loss": 0.017333984375, "epoch": 0.77, "final_loss": 0.017333984375, "grad_norm": 0.0, "learning_rate": 1.3139230262772116e-07, "loss": 0.0291, "projector_lr": 3.941769078831635e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.21875, "sft_loss": 0.6640625, "step": 4789 }, { "dpo_loss": 0.087890625, "epoch": 0.77, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 1.3122080970075033e-07, "loss": 0.2322, "projector_lr": 3.9366242910225103e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.9375, "sft_loss": 1.0546875, "step": 4790 }, { "dpo_loss": 0.30078125, "epoch": 0.77, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 1.31049411857444e-07, "loss": 0.1703, "projector_lr": 3.9314823557233206e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.125, "sft_loss": 1.0234375, "step": 4791 }, { "dpo_loss": 0.0703125, "epoch": 0.77, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 1.3087810914199405e-07, "loss": 0.0755, "projector_lr": 3.9263432742598215e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.6875, "sft_loss": 0.8984375, "step": 4792 }, { "dpo_loss": 0.255859375, "epoch": 0.77, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 1.3070690159856834e-07, "loss": 0.2627, "projector_lr": 3.9212070479570503e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.0625, "sft_loss": 0.8046875, "step": 4793 }, { "dpo_loss": 0.181640625, "epoch": 0.77, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 1.3053578927130982e-07, "loss": 0.1139, "projector_lr": 3.916073678139295e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.1875, "sft_loss": 1.4140625, "step": 4794 }, { "dpo_loss": 0.03564453125, "epoch": 0.77, "final_loss": 0.03564453125, "grad_norm": 0.0, "learning_rate": 1.3036477220433694e-07, "loss": 0.108, "projector_lr": 3.9109431661301085e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.875, "sft_loss": 0.984375, "step": 4795 }, { "dpo_loss": 0.1689453125, "epoch": 0.77, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 1.3019385044174353e-07, "loss": 0.2172, "projector_lr": 3.905815513252306e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.8125, "sft_loss": 0.78125, "step": 4796 }, { "dpo_loss": 0.150390625, "epoch": 0.77, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 1.3002302402759923e-07, "loss": 0.3019, "projector_lr": 3.9006907208279776e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 8.125, "rewards_train/rejected": -8.5, "sft_loss": 0.80078125, "step": 4797 }, { "dpo_loss": 0.0888671875, "epoch": 0.77, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 1.2985229300594873e-07, "loss": 0.1414, "projector_lr": 3.8955687901784625e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.90625, "sft_loss": 0.546875, "step": 4798 }, { "dpo_loss": 0.06298828125, "epoch": 0.77, "final_loss": 0.06298828125, "grad_norm": 0.0, "learning_rate": 1.2968165742081217e-07, "loss": 0.1393, "projector_lr": 3.890449722624365e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.34375, "sft_loss": 0.828125, "step": 4799 }, { "dpo_loss": 0.32421875, "epoch": 0.77, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 1.29511117316185e-07, "loss": 0.1967, "projector_lr": 3.885333519485551e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0625, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.3125, "sft_loss": 0.6328125, "step": 4800 }, { "dpo_loss": 0.134765625, "epoch": 0.77, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 1.2934067273603854e-07, "loss": 0.1473, "projector_lr": 3.880220182081157e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.21875, "sft_loss": 0.7734375, "step": 4801 }, { "dpo_loss": 0.10595703125, "epoch": 0.77, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 1.2917032372431875e-07, "loss": 0.1471, "projector_lr": 3.875109711729563e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.5, "sft_loss": 0.95703125, "step": 4802 }, { "dpo_loss": 0.310546875, "epoch": 0.77, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 1.2900007032494785e-07, "loss": 0.1862, "projector_lr": 3.8700021097484363e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.5, "sft_loss": 0.94921875, "step": 4803 }, { "dpo_loss": 0.1494140625, "epoch": 0.77, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 1.2882991258182224e-07, "loss": 0.0826, "projector_lr": 3.864897377454668e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.15625, "sft_loss": 0.5703125, "step": 4804 }, { "dpo_loss": 0.146484375, "epoch": 0.77, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 1.2865985053881473e-07, "loss": 0.1141, "projector_lr": 3.859795516164442e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.6875, "sft_loss": 0.6484375, "step": 4805 }, { "dpo_loss": 0.322265625, "epoch": 0.77, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 1.284898842397728e-07, "loss": 0.2598, "projector_lr": 3.8546965271931846e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5, "rewards_train/margins": 2.6875, "rewards_train/rejected": -3.1875, "sft_loss": 0.85546875, "step": 4806 }, { "dpo_loss": 0.028076171875, "epoch": 0.77, "final_loss": 0.028076171875, "grad_norm": 0.0, "learning_rate": 1.2832001372851974e-07, "loss": 0.1137, "projector_lr": 3.8496004118555926e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.625, "sft_loss": 0.59765625, "step": 4807 }, { "dpo_loss": 0.41015625, "epoch": 0.77, "final_loss": 0.41015625, "grad_norm": 0.0, "learning_rate": 1.281502390488537e-07, "loss": 0.3194, "projector_lr": 3.844507171465611e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.828125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -6.28125, "sft_loss": 0.69140625, "step": 4808 }, { "dpo_loss": 0.138671875, "epoch": 0.77, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 1.2798056024454834e-07, "loss": 0.3331, "projector_lr": 3.8394168073364503e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.375, "sft_loss": 0.703125, "step": 4809 }, { "dpo_loss": 0.06640625, "epoch": 0.77, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 1.2781097735935238e-07, "loss": 0.2872, "projector_lr": 3.8343293207805714e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31640625, "rewards_train/margins": 6.65625, "rewards_train/rejected": -6.96875, "sft_loss": 0.5703125, "step": 4810 }, { "dpo_loss": 0.07958984375, "epoch": 0.77, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 1.2764149043699023e-07, "loss": 0.0493, "projector_lr": 3.8292447131097074e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.0625, "sft_loss": 0.921875, "step": 4811 }, { "dpo_loss": 0.2421875, "epoch": 0.77, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 1.2747209952116127e-07, "loss": 0.1674, "projector_lr": 3.824162985634839e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.134765625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -4.9375, "sft_loss": 0.80859375, "step": 4812 }, { "dpo_loss": 0.046875, "epoch": 0.77, "final_loss": 0.046875, "grad_norm": 0.0, "learning_rate": 1.2730280465553994e-07, "loss": 0.0721, "projector_lr": 3.8190841396661984e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.96875, "sft_loss": 0.6953125, "step": 4813 }, { "dpo_loss": 0.1513671875, "epoch": 0.77, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 1.2713360588377647e-07, "loss": 0.3883, "projector_lr": 3.8140081765132944e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.03125, "sft_loss": 0.66015625, "step": 4814 }, { "dpo_loss": 0.0810546875, "epoch": 0.77, "final_loss": 0.0810546875, "grad_norm": 0.0, "learning_rate": 1.2696450324949588e-07, "loss": 0.1199, "projector_lr": 3.8089350974848764e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.169921875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.15625, "sft_loss": 0.79296875, "step": 4815 }, { "dpo_loss": 0.0908203125, "epoch": 0.77, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 1.2679549679629842e-07, "loss": 0.1241, "projector_lr": 3.803864903888953e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.40625, "sft_loss": 0.82421875, "step": 4816 }, { "dpo_loss": 0.373046875, "epoch": 0.77, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 1.2662658656775953e-07, "loss": 0.2402, "projector_lr": 3.7987975970327865e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.25, "sft_loss": 0.8515625, "step": 4817 }, { "dpo_loss": 0.049560546875, "epoch": 0.77, "final_loss": 0.049560546875, "grad_norm": 0.0, "learning_rate": 1.264577726074303e-07, "loss": 0.2597, "projector_lr": 3.793733178222909e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.71875, "sft_loss": 0.625, "step": 4818 }, { "dpo_loss": 0.36328125, "epoch": 0.77, "final_loss": 0.36328125, "grad_norm": 0.0, "learning_rate": 1.2628905495883625e-07, "loss": 0.2131, "projector_lr": 3.7886716487650873e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": 0.0198974609375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.53125, "sft_loss": 0.76953125, "step": 4819 }, { "dpo_loss": 0.82421875, "epoch": 0.77, "final_loss": 0.82421875, "grad_norm": 0.0, "learning_rate": 1.2612043366547874e-07, "loss": 0.5473, "projector_lr": 3.783613009964363e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.71875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.984375, "sft_loss": 0.9140625, "step": 4820 }, { "dpo_loss": 0.6015625, "epoch": 0.77, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 1.25951908770834e-07, "loss": 0.6375, "projector_lr": 3.7785572631250204e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.625, "sft_loss": 0.796875, "step": 4821 }, { "dpo_loss": 0.0101318359375, "epoch": 0.77, "final_loss": 0.0101318359375, "grad_norm": 0.0, "learning_rate": 1.2578348031835323e-07, "loss": 0.0761, "projector_lr": 3.773504409550597e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 8.25, "rewards_train/rejected": -8.5625, "sft_loss": 0.416015625, "step": 4822 }, { "dpo_loss": 0.21484375, "epoch": 0.77, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 1.256151483514629e-07, "loss": 0.1752, "projector_lr": 3.7684544505438876e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 7.5, "rewards_train/rejected": -8.4375, "sft_loss": 0.6328125, "step": 4823 }, { "dpo_loss": 0.072265625, "epoch": 0.77, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 1.2544691291356497e-07, "loss": 0.2092, "projector_lr": 3.763407387406949e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.59375, "sft_loss": 0.515625, "step": 4824 }, { "dpo_loss": 0.25390625, "epoch": 0.77, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 1.2527877404803595e-07, "loss": 0.1636, "projector_lr": 3.7583632214410784e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.125, "rewards_train/rejected": -4.21875, "sft_loss": 1.1171875, "step": 4825 }, { "dpo_loss": 0.0172119140625, "epoch": 0.77, "final_loss": 0.0172119140625, "grad_norm": 0.0, "learning_rate": 1.2511073179822772e-07, "loss": 0.0425, "projector_lr": 3.7533219539468324e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.25, "sft_loss": 0.59765625, "step": 4826 }, { "dpo_loss": 0.0888671875, "epoch": 0.77, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 1.2494278620746712e-07, "loss": 0.1067, "projector_lr": 3.748283586224014e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.53125, "sft_loss": 0.6796875, "step": 4827 }, { "dpo_loss": 0.423828125, "epoch": 0.77, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 1.2477493731905636e-07, "loss": 0.2472, "projector_lr": 3.743248119571691e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 3.3125, "rewards_train/rejected": -4.375, "sft_loss": 0.98046875, "step": 4828 }, { "dpo_loss": 0.07177734375, "epoch": 0.77, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 1.2460718517627233e-07, "loss": 0.049, "projector_lr": 3.7382155552881704e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.361328125, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.125, "sft_loss": 0.56640625, "step": 4829 }, { "dpo_loss": 0.0849609375, "epoch": 0.77, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 1.2443952982236754e-07, "loss": 0.0866, "projector_lr": 3.733185894671027e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0086669921875, "rewards_train/margins": 4.625, "rewards_train/rejected": -4.59375, "sft_loss": 0.73828125, "step": 4830 }, { "dpo_loss": 0.103515625, "epoch": 0.77, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 1.242719713005686e-07, "loss": 0.0766, "projector_lr": 3.728159139017058e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.28125, "sft_loss": 0.765625, "step": 4831 }, { "dpo_loss": 0.09619140625, "epoch": 0.77, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 1.241045096540782e-07, "loss": 0.1053, "projector_lr": 3.7231352896223463e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.15625, "sft_loss": 0.90625, "step": 4832 }, { "dpo_loss": 0.31640625, "epoch": 0.77, "final_loss": 0.31640625, "grad_norm": 0.0, "learning_rate": 1.2393714492607327e-07, "loss": 0.2036, "projector_lr": 3.7181143477821984e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.90625, "sft_loss": 0.91796875, "step": 4833 }, { "dpo_loss": 0.13671875, "epoch": 0.77, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 1.2376987715970648e-07, "loss": 0.2013, "projector_lr": 3.713096314791195e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.78125, "sft_loss": 0.65234375, "step": 4834 }, { "dpo_loss": 0.30859375, "epoch": 0.77, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 1.2360270639810455e-07, "loss": 0.2216, "projector_lr": 3.708081191943137e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.625, "sft_loss": 0.47265625, "step": 4835 }, { "dpo_loss": 0.1806640625, "epoch": 0.77, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 1.2343563268437007e-07, "loss": 0.1726, "projector_lr": 3.7030689805311025e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1630859375, "rewards_train/margins": 5.53125, "rewards_train/rejected": -5.6875, "sft_loss": 0.59375, "step": 4836 }, { "dpo_loss": 0.3515625, "epoch": 0.77, "final_loss": 0.3515625, "grad_norm": 0.0, "learning_rate": 1.2326865606158e-07, "loss": 0.1915, "projector_lr": 3.6980596818474e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.78125, "sft_loss": 0.6015625, "step": 4837 }, { "dpo_loss": 0.09521484375, "epoch": 0.77, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 1.231017765727868e-07, "loss": 0.0639, "projector_lr": 3.6930532971836045e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2470703125, "rewards_train/margins": 7.0625, "rewards_train/rejected": -7.3125, "sft_loss": 0.734375, "step": 4838 }, { "dpo_loss": 0.3046875, "epoch": 0.77, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 1.2293499426101756e-07, "loss": 0.2606, "projector_lr": 3.6880498278305267e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.6875, "sft_loss": 1.1953125, "step": 4839 }, { "dpo_loss": 0.1005859375, "epoch": 0.77, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 1.2276830916927423e-07, "loss": 0.057, "projector_lr": 3.683049275078227e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.09375, "sft_loss": 0.59375, "step": 4840 }, { "dpo_loss": 0.66796875, "epoch": 0.77, "final_loss": 0.66796875, "grad_norm": 0.0, "learning_rate": 1.2260172134053376e-07, "loss": 0.4692, "projector_lr": 3.678051640216013e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.5625, "sft_loss": 0.828125, "step": 4841 }, { "dpo_loss": 0.1162109375, "epoch": 0.77, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 1.224352308177483e-07, "loss": 0.0803, "projector_lr": 3.6730569245324496e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.1875, "rewards_train/rejected": -3.8125, "sft_loss": 0.8828125, "step": 4842 }, { "dpo_loss": 0.07666015625, "epoch": 0.77, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 1.2226883764384466e-07, "loss": 0.05, "projector_lr": 3.66806512931534e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 6.46875, "rewards_train/rejected": -7.0, "sft_loss": 0.494140625, "step": 4843 }, { "dpo_loss": 0.1533203125, "epoch": 0.78, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 1.221025418617246e-07, "loss": 0.1594, "projector_lr": 3.663076255851738e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.3125, "sft_loss": 0.875, "step": 4844 }, { "dpo_loss": 0.228515625, "epoch": 0.78, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 1.2193634351426447e-07, "loss": 0.1378, "projector_lr": 3.658090305427934e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 3.265625, "rewards_train/rejected": -3.921875, "sft_loss": 0.921875, "step": 4845 }, { "dpo_loss": 0.2138671875, "epoch": 0.78, "final_loss": 0.2138671875, "grad_norm": 0.0, "learning_rate": 1.2177024264431617e-07, "loss": 0.2409, "projector_lr": 3.653107279329486e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.59375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.84375, "sft_loss": 1.015625, "step": 4846 }, { "dpo_loss": 0.142578125, "epoch": 0.78, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 1.2160423929470582e-07, "loss": 0.1344, "projector_lr": 3.648127178841175e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.8125, "sft_loss": 0.6953125, "step": 4847 }, { "dpo_loss": 0.047119140625, "epoch": 0.78, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 1.214383335082349e-07, "loss": 0.0725, "projector_lr": 3.643150005247047e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.5625, "sft_loss": 0.58984375, "step": 4848 }, { "dpo_loss": 0.05078125, "epoch": 0.78, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 1.212725253276794e-07, "loss": 0.0789, "projector_lr": 3.638175759830382e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 6.40625, "rewards_train/rejected": -8.125, "sft_loss": 1.0, "step": 4849 }, { "dpo_loss": 0.17578125, "epoch": 0.78, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 1.2110681479579022e-07, "loss": 0.1599, "projector_lr": 3.633204443873707e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.03369140625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -4.75, "sft_loss": 0.71875, "step": 4850 }, { "dpo_loss": 0.373046875, "epoch": 0.78, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 1.209412019552929e-07, "loss": 0.2504, "projector_lr": 3.628236058658787e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.03125, "sft_loss": 0.8515625, "step": 4851 }, { "dpo_loss": 0.1767578125, "epoch": 0.78, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 1.2077568684888834e-07, "loss": 0.1339, "projector_lr": 3.62327060546665e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.5625, "sft_loss": 0.81640625, "step": 4852 }, { "dpo_loss": 0.1513671875, "epoch": 0.78, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 1.2061026951925173e-07, "loss": 0.0934, "projector_lr": 3.6183080855775523e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.125, "sft_loss": 0.66796875, "step": 4853 }, { "dpo_loss": 0.06787109375, "epoch": 0.78, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 1.204449500090331e-07, "loss": 0.1799, "projector_lr": 3.613348500270993e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.5625, "sft_loss": 0.6484375, "step": 4854 }, { "dpo_loss": 0.494140625, "epoch": 0.78, "final_loss": 0.494140625, "grad_norm": 0.0, "learning_rate": 1.2027972836085764e-07, "loss": 0.271, "projector_lr": 3.6083918508257296e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.21875, "sft_loss": 0.86328125, "step": 4855 }, { "dpo_loss": 0.08837890625, "epoch": 0.78, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 1.2011460461732492e-07, "loss": 0.0835, "projector_lr": 3.6034381385197477e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.40625, "sft_loss": 0.76953125, "step": 4856 }, { "dpo_loss": 0.484375, "epoch": 0.78, "final_loss": 0.484375, "grad_norm": 0.0, "learning_rate": 1.199495788210093e-07, "loss": 0.4121, "projector_lr": 3.5984873646302797e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.78125, "sft_loss": 0.90625, "step": 4857 }, { "dpo_loss": 0.09912109375, "epoch": 0.78, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 1.1978465101446e-07, "loss": 0.3153, "projector_lr": 3.5935395304338004e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.78125, "sft_loss": 0.6484375, "step": 4858 }, { "dpo_loss": 0.064453125, "epoch": 0.78, "final_loss": 0.064453125, "grad_norm": 0.0, "learning_rate": 1.1961982124020104e-07, "loss": 0.0518, "projector_lr": 3.5885946372060315e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.40625, "sft_loss": 0.6484375, "step": 4859 }, { "dpo_loss": 0.10400390625, "epoch": 0.78, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 1.1945508954073097e-07, "loss": 0.1048, "projector_lr": 3.583652686221929e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.26171875, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.59375, "sft_loss": 0.6640625, "step": 4860 }, { "dpo_loss": 0.2021484375, "epoch": 0.78, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 1.1929045595852343e-07, "loss": 0.1749, "projector_lr": 3.5787136787557033e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.65625, "sft_loss": 0.87109375, "step": 4861 }, { "dpo_loss": 0.0159912109375, "epoch": 0.78, "final_loss": 0.0159912109375, "grad_norm": 0.0, "learning_rate": 1.1912592053602599e-07, "loss": 0.0803, "projector_lr": 3.57377761608078e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.90625, "sft_loss": 1.0625, "step": 4862 }, { "dpo_loss": 0.0791015625, "epoch": 0.78, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 1.1896148331566186e-07, "loss": 0.1379, "projector_lr": 3.568844499469856e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.65625, "sft_loss": 0.7421875, "step": 4863 }, { "dpo_loss": 0.111328125, "epoch": 0.78, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 1.1879714433982813e-07, "loss": 0.2383, "projector_lr": 3.563914330194844e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.65625, "sft_loss": 1.1796875, "step": 4864 }, { "dpo_loss": 0.06396484375, "epoch": 0.78, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 1.1863290365089729e-07, "loss": 0.1552, "projector_lr": 3.5589871095269187e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.625, "sft_loss": 0.9453125, "step": 4865 }, { "dpo_loss": 0.109375, "epoch": 0.78, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 1.1846876129121586e-07, "loss": 0.0815, "projector_lr": 3.554062838736476e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.1875, "sft_loss": 0.87109375, "step": 4866 }, { "dpo_loss": 0.12353515625, "epoch": 0.78, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 1.1830471730310532e-07, "loss": 0.1187, "projector_lr": 3.54914151909316e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 7.21875, "rewards_train/rejected": -8.8125, "sft_loss": 0.81640625, "step": 4867 }, { "dpo_loss": 0.0177001953125, "epoch": 0.78, "final_loss": 0.0177001953125, "grad_norm": 0.0, "learning_rate": 1.181407717288615e-07, "loss": 0.1793, "projector_lr": 3.544223151865845e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.46875, "sft_loss": 0.7421875, "step": 4868 }, { "dpo_loss": 0.07958984375, "epoch": 0.78, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 1.1797692461075548e-07, "loss": 0.0687, "projector_lr": 3.5393077383226645e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 6.625, "rewards_train/rejected": -7.6875, "sft_loss": 0.68359375, "step": 4869 }, { "dpo_loss": 0.74609375, "epoch": 0.78, "final_loss": 0.74609375, "grad_norm": 0.0, "learning_rate": 1.1781317599103236e-07, "loss": 0.6315, "projector_lr": 3.5343952797309707e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 3.015625, "rewards_train/rejected": -3.546875, "sft_loss": 0.5859375, "step": 4870 }, { "dpo_loss": 0.10888671875, "epoch": 0.78, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 1.1764952591191201e-07, "loss": 0.0877, "projector_lr": 3.5294857773573607e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.90625, "sft_loss": 0.6953125, "step": 4871 }, { "dpo_loss": 0.154296875, "epoch": 0.78, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 1.1748597441558877e-07, "loss": 0.1185, "projector_lr": 3.524579232467663e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.5, "sft_loss": 0.8203125, "step": 4872 }, { "dpo_loss": 0.1630859375, "epoch": 0.78, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 1.17322521544232e-07, "loss": 0.4583, "projector_lr": 3.51967564632696e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.375, "sft_loss": 0.7265625, "step": 4873 }, { "dpo_loss": 0.1123046875, "epoch": 0.78, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 1.171591673399851e-07, "loss": 0.0814, "projector_lr": 3.514775020199553e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.25, "sft_loss": 0.4921875, "step": 4874 }, { "dpo_loss": 0.228515625, "epoch": 0.78, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 1.1699591184496671e-07, "loss": 0.1348, "projector_lr": 3.509877355349002e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.65625, "sft_loss": 0.86328125, "step": 4875 }, { "dpo_loss": 0.053466796875, "epoch": 0.78, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 1.1683275510126889e-07, "loss": 0.0982, "projector_lr": 3.504982653038067e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 7.125, "rewards_train/rejected": -7.8125, "sft_loss": 0.63671875, "step": 4876 }, { "dpo_loss": 0.12158203125, "epoch": 0.78, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 1.1666969715095948e-07, "loss": 0.0866, "projector_lr": 3.500090914528785e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.091796875, "rewards_train/margins": 5.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.71484375, "step": 4877 }, { "dpo_loss": 0.01611328125, "epoch": 0.78, "final_loss": 0.01611328125, "grad_norm": 0.0, "learning_rate": 1.1650673803607997e-07, "loss": 0.0115, "projector_lr": 3.495202141082399e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1884765625, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.0625, "sft_loss": 0.70703125, "step": 4878 }, { "dpo_loss": 0.1142578125, "epoch": 0.78, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 1.1634387779864713e-07, "loss": 0.1529, "projector_lr": 3.490316333959414e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 7.21875, "rewards_train/rejected": -8.1875, "sft_loss": 0.6171875, "step": 4879 }, { "dpo_loss": 0.04931640625, "epoch": 0.78, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 1.1618111648065127e-07, "loss": 0.0581, "projector_lr": 3.4854334944195383e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.5625, "sft_loss": 0.77734375, "step": 4880 }, { "dpo_loss": 0.058837890625, "epoch": 0.78, "final_loss": 0.058837890625, "grad_norm": 0.0, "learning_rate": 1.1601845412405803e-07, "loss": 0.0715, "projector_lr": 3.480553623721741e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.0, "sft_loss": 0.91796875, "step": 4881 }, { "dpo_loss": 0.208984375, "epoch": 0.78, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 1.1585589077080704e-07, "loss": 0.3099, "projector_lr": 3.475676723124211e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.71875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.0625, "sft_loss": 0.87890625, "step": 4882 }, { "dpo_loss": 0.1171875, "epoch": 0.78, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 1.1569342646281289e-07, "loss": 0.0868, "projector_lr": 3.470802793884387e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.96875, "sft_loss": 0.71484375, "step": 4883 }, { "dpo_loss": 0.130859375, "epoch": 0.78, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 1.1553106124196416e-07, "loss": 0.2344, "projector_lr": 3.465931837258925e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.71875, "sft_loss": 0.57421875, "step": 4884 }, { "dpo_loss": 0.1552734375, "epoch": 0.78, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 1.1536879515012405e-07, "loss": 0.0852, "projector_lr": 3.4610638545037217e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.8125, "sft_loss": 0.609375, "step": 4885 }, { "dpo_loss": 0.039794921875, "epoch": 0.78, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 1.1520662822913007e-07, "loss": 0.0518, "projector_lr": 3.4561988468739026e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.875, "sft_loss": 0.73828125, "step": 4886 }, { "dpo_loss": 0.035888671875, "epoch": 0.78, "final_loss": 0.035888671875, "grad_norm": 0.0, "learning_rate": 1.1504456052079465e-07, "loss": 0.0187, "projector_lr": 3.4513368156238396e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.5625, "sft_loss": 0.6171875, "step": 4887 }, { "dpo_loss": 0.08447265625, "epoch": 0.78, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 1.1488259206690403e-07, "loss": 0.1352, "projector_lr": 3.446477762007121e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.71875, "sft_loss": 0.7578125, "step": 4888 }, { "dpo_loss": 0.12353515625, "epoch": 0.78, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 1.1472072290921908e-07, "loss": 0.1686, "projector_lr": 3.4416216872765727e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.146484375, "rewards_train/margins": 4.75, "rewards_train/rejected": -4.875, "sft_loss": 0.51171875, "step": 4889 }, { "dpo_loss": 0.0159912109375, "epoch": 0.78, "final_loss": 0.0159912109375, "grad_norm": 0.0, "learning_rate": 1.1455895308947533e-07, "loss": 0.0812, "projector_lr": 3.43676859268426e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.8125, "sft_loss": 0.79296875, "step": 4890 }, { "dpo_loss": 0.6015625, "epoch": 0.78, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 1.1439728264938237e-07, "loss": 0.3417, "projector_lr": 3.431918479481471e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.53125, "sft_loss": 0.84765625, "step": 4891 }, { "dpo_loss": 0.08154296875, "epoch": 0.78, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 1.1423571163062412e-07, "loss": 0.0828, "projector_lr": 3.427071348918724e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.59375, "sft_loss": 2.3125, "step": 4892 }, { "dpo_loss": 0.0023651123046875, "epoch": 0.78, "final_loss": 0.0023651123046875, "grad_norm": 0.0, "learning_rate": 1.1407424007485928e-07, "loss": 0.2496, "projector_lr": 3.4222272022457784e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 7.34375, "rewards_train/rejected": -8.125, "sft_loss": 0.94140625, "step": 4893 }, { "dpo_loss": 0.02587890625, "epoch": 0.78, "final_loss": 0.02587890625, "grad_norm": 0.0, "learning_rate": 1.1391286802372052e-07, "loss": 0.1903, "projector_lr": 3.417386040711616e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.490234375, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.65625, "sft_loss": 0.578125, "step": 4894 }, { "dpo_loss": 0.0196533203125, "epoch": 0.78, "final_loss": 0.0196533203125, "grad_norm": 0.0, "learning_rate": 1.137515955188148e-07, "loss": 0.0536, "projector_lr": 3.4125478655644444e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3125, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.25, "sft_loss": 0.7109375, "step": 4895 }, { "dpo_loss": 0.72265625, "epoch": 0.78, "final_loss": 0.72265625, "grad_norm": 0.0, "learning_rate": 1.1359042260172392e-07, "loss": 0.5794, "projector_lr": 3.407712678051718e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.296875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -5.5625, "sft_loss": 1.0703125, "step": 4896 }, { "dpo_loss": 0.02734375, "epoch": 0.78, "final_loss": 0.02734375, "grad_norm": 0.0, "learning_rate": 1.134293493140035e-07, "loss": 0.0237, "projector_lr": 3.4028804794201053e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.375, "rewards_train/margins": 6.4375, "rewards_train/rejected": -6.8125, "sft_loss": 0.63671875, "step": 4897 }, { "dpo_loss": 0.18359375, "epoch": 0.78, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 1.1326837569718368e-07, "loss": 0.1041, "projector_lr": 3.3980512709155106e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.5625, "sft_loss": 0.78125, "step": 4898 }, { "dpo_loss": 0.216796875, "epoch": 0.78, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 1.1310750179276873e-07, "loss": 0.2005, "projector_lr": 3.3932250537830623e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.875, "sft_loss": 1.078125, "step": 4899 }, { "dpo_loss": 0.6875, "epoch": 0.78, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 1.1294672764223756e-07, "loss": 0.3791, "projector_lr": 3.388401829267127e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.734375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -5.5, "sft_loss": 0.7421875, "step": 4900 }, { "dpo_loss": 0.0654296875, "epoch": 0.78, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 1.1278605328704288e-07, "loss": 0.1408, "projector_lr": 3.3835815986112865e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.4375, "sft_loss": 0.59375, "step": 4901 }, { "dpo_loss": 0.072265625, "epoch": 0.78, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 1.1262547876861244e-07, "loss": 0.1017, "projector_lr": 3.378764363058374e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.96875, "sft_loss": 0.609375, "step": 4902 }, { "dpo_loss": 0.11474609375, "epoch": 0.78, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 1.1246500412834709e-07, "loss": 0.1538, "projector_lr": 3.373950123850413e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.875, "sft_loss": 0.7421875, "step": 4903 }, { "dpo_loss": 0.2431640625, "epoch": 0.78, "final_loss": 0.2431640625, "grad_norm": 0.0, "learning_rate": 1.1230462940762303e-07, "loss": 0.13, "projector_lr": 3.369138882228691e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 2.703125, "rewards_train/rejected": -3.375, "sft_loss": 0.75, "step": 4904 }, { "dpo_loss": 0.158203125, "epoch": 0.78, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 1.1214435464779003e-07, "loss": 0.1481, "projector_lr": 3.3643306394337013e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.9375, "sft_loss": 0.79296875, "step": 4905 }, { "dpo_loss": 0.10791015625, "epoch": 0.78, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 1.1198417989017267e-07, "loss": 0.1939, "projector_lr": 3.3595253967051804e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 7.9375, "rewards_train/rejected": -8.5625, "sft_loss": 0.62109375, "step": 4906 }, { "dpo_loss": 0.12255859375, "epoch": 0.79, "final_loss": 0.12255859375, "grad_norm": 0.0, "learning_rate": 1.1182410517606883e-07, "loss": 0.1807, "projector_lr": 3.354723155282065e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.423828125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.65625, "sft_loss": 0.64453125, "step": 4907 }, { "dpo_loss": 0.06884765625, "epoch": 0.79, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 1.1166413054675156e-07, "loss": 0.1137, "projector_lr": 3.349923916402547e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.53125, "rewards_train/margins": 6.5625, "rewards_train/rejected": -8.125, "sft_loss": 0.76953125, "step": 4908 }, { "dpo_loss": 0.3359375, "epoch": 0.79, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 1.1150425604346742e-07, "loss": 0.1806, "projector_lr": 3.345127681304023e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.40625, "sft_loss": 0.71875, "step": 4909 }, { "dpo_loss": 0.03564453125, "epoch": 0.79, "final_loss": 0.03564453125, "grad_norm": 0.0, "learning_rate": 1.1134448170743771e-07, "loss": 0.1378, "projector_lr": 3.3403344512231313e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.07421875, "rewards_train/margins": 5.875, "rewards_train/rejected": -5.9375, "sft_loss": 0.94140625, "step": 4910 }, { "dpo_loss": 0.2421875, "epoch": 0.79, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 1.1118480757985754e-07, "loss": 0.2659, "projector_lr": 3.3355442273957265e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.859375, "sft_loss": 0.79296875, "step": 4911 }, { "dpo_loss": 0.1083984375, "epoch": 0.79, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 1.1102523370189614e-07, "loss": 0.1177, "projector_lr": 3.3307570110568847e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.59375, "sft_loss": 0.67578125, "step": 4912 }, { "dpo_loss": 0.0634765625, "epoch": 0.79, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 1.1086576011469701e-07, "loss": 0.0989, "projector_lr": 3.32597280344091e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.046142578125, "rewards_train/margins": 7.09375, "rewards_train/rejected": -7.15625, "sft_loss": 0.63671875, "step": 4913 }, { "dpo_loss": 0.0625, "epoch": 0.79, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 1.10706386859378e-07, "loss": 0.106, "projector_lr": 3.3211916057813404e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.6875, "sft_loss": 0.76953125, "step": 4914 }, { "dpo_loss": 0.0108642578125, "epoch": 0.79, "final_loss": 0.0108642578125, "grad_norm": 0.0, "learning_rate": 1.1054711397703076e-07, "loss": 0.266, "projector_lr": 3.316413419310923e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.357421875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -5.96875, "sft_loss": 0.578125, "step": 4915 }, { "dpo_loss": 0.01806640625, "epoch": 0.79, "final_loss": 0.01806640625, "grad_norm": 0.0, "learning_rate": 1.1038794150872117e-07, "loss": 0.086, "projector_lr": 3.311638245261635e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.15625, "sft_loss": 0.58203125, "step": 4916 }, { "dpo_loss": 0.04248046875, "epoch": 0.79, "final_loss": 0.04248046875, "grad_norm": 0.0, "learning_rate": 1.1022886949548916e-07, "loss": 0.177, "projector_lr": 3.3068660848646747e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.40625, "sft_loss": 0.80859375, "step": 4917 }, { "dpo_loss": 0.216796875, "epoch": 0.79, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 1.1006989797834904e-07, "loss": 0.1767, "projector_lr": 3.302096939350471e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.65625, "sft_loss": 0.7890625, "step": 4918 }, { "dpo_loss": 0.59765625, "epoch": 0.79, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 1.099110269982888e-07, "loss": 0.3131, "projector_lr": 3.2973308099486643e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -1.265625, "rewards_train/margins": 1.7265625, "rewards_train/rejected": -2.984375, "sft_loss": 1.0703125, "step": 4919 }, { "dpo_loss": 0.09033203125, "epoch": 0.79, "final_loss": 0.09033203125, "grad_norm": 0.0, "learning_rate": 1.0975225659627114e-07, "loss": 0.0694, "projector_lr": 3.292567697888134e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.59375, "sft_loss": 0.58203125, "step": 4920 }, { "dpo_loss": 0.072265625, "epoch": 0.79, "final_loss": 0.072265625, "grad_norm": 0.0, "learning_rate": 1.0959358681323177e-07, "loss": 0.3109, "projector_lr": 3.2878076043969535e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 7.28125, "rewards_train/rejected": -8.0, "sft_loss": 0.6953125, "step": 4921 }, { "dpo_loss": 0.1806640625, "epoch": 0.79, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 1.0943501769008151e-07, "loss": 0.0932, "projector_lr": 3.2830505307024456e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.25, "sft_loss": 0.54296875, "step": 4922 }, { "dpo_loss": 0.18359375, "epoch": 0.79, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 1.0927654926770463e-07, "loss": 0.1271, "projector_lr": 3.278296478031139e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.578125, "rewards_train/margins": 3.921875, "rewards_train/rejected": -4.5, "sft_loss": 0.7578125, "step": 4923 }, { "dpo_loss": 0.248046875, "epoch": 0.79, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 1.0911818158695996e-07, "loss": 0.128, "projector_lr": 3.273545447608799e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.125, "sft_loss": 1.4609375, "step": 4924 }, { "dpo_loss": 0.25390625, "epoch": 0.79, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 1.0895991468867949e-07, "loss": 0.1712, "projector_lr": 3.268797440660385e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.9375, "sft_loss": 1.0625, "step": 4925 }, { "dpo_loss": 0.0751953125, "epoch": 0.79, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 1.0880174861367009e-07, "loss": 0.0683, "projector_lr": 3.2640524584101026e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.828125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.40625, "sft_loss": 0.76953125, "step": 4926 }, { "dpo_loss": 0.1328125, "epoch": 0.79, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 1.0864368340271207e-07, "loss": 0.0749, "projector_lr": 3.259310502081362e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.84375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -5.28125, "sft_loss": 0.96484375, "step": 4927 }, { "dpo_loss": 0.02099609375, "epoch": 0.79, "final_loss": 0.02099609375, "grad_norm": 0.0, "learning_rate": 1.0848571909656018e-07, "loss": 0.148, "projector_lr": 3.2545715728968057e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.380859375, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.40625, "sft_loss": 0.86328125, "step": 4928 }, { "dpo_loss": 0.1318359375, "epoch": 0.79, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 1.0832785573594283e-07, "loss": 0.0976, "projector_lr": 3.249835672078285e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.28125, "sft_loss": 0.80078125, "step": 4929 }, { "dpo_loss": 0.1083984375, "epoch": 0.79, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 1.0817009336156251e-07, "loss": 0.1321, "projector_lr": 3.2451028008468757e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 4.375, "rewards_train/rejected": -6.1875, "sft_loss": 1.046875, "step": 4930 }, { "dpo_loss": 0.275390625, "epoch": 0.79, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 1.080124320140955e-07, "loss": 0.3688, "projector_lr": 3.240372960422865e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.75, "sft_loss": 0.94140625, "step": 4931 }, { "dpo_loss": 0.130859375, "epoch": 0.79, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 1.0785487173419244e-07, "loss": 0.2066, "projector_lr": 3.2356461520257733e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.59375, "sft_loss": 0.84375, "step": 4932 }, { "dpo_loss": 0.057373046875, "epoch": 0.79, "final_loss": 0.057373046875, "grad_norm": 0.0, "learning_rate": 1.0769741256247755e-07, "loss": 0.0423, "projector_lr": 3.230922376874327e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.0625, "sft_loss": 0.5859375, "step": 4933 }, { "dpo_loss": 0.053955078125, "epoch": 0.79, "final_loss": 0.053955078125, "grad_norm": 0.0, "learning_rate": 1.0754005453954901e-07, "loss": 0.1698, "projector_lr": 3.2262016361864705e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.341796875, "rewards_train/margins": 7.5625, "rewards_train/rejected": -7.90625, "sft_loss": 0.97265625, "step": 4934 }, { "dpo_loss": 0.04638671875, "epoch": 0.79, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 1.0738279770597919e-07, "loss": 0.0895, "projector_lr": 3.221483931179376e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.373046875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.625, "sft_loss": 0.8203125, "step": 4935 }, { "dpo_loss": 0.09228515625, "epoch": 0.79, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 1.0722564210231406e-07, "loss": 0.0926, "projector_lr": 3.2167692630694216e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.8125, "sft_loss": 0.58984375, "step": 4936 }, { "dpo_loss": 0.37109375, "epoch": 0.79, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 1.0706858776907352e-07, "loss": 0.1986, "projector_lr": 3.212057633072206e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.0625, "sft_loss": 1.0, "step": 4937 }, { "dpo_loss": 0.19140625, "epoch": 0.79, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 1.0691163474675169e-07, "loss": 0.2032, "projector_lr": 3.207349042402551e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.03125, "sft_loss": 0.6953125, "step": 4938 }, { "dpo_loss": 0.1748046875, "epoch": 0.79, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 1.0675478307581625e-07, "loss": 0.136, "projector_lr": 3.202643492274488e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.03125, "sft_loss": 0.7578125, "step": 4939 }, { "dpo_loss": 0.06787109375, "epoch": 0.79, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 1.0659803279670865e-07, "loss": 0.1768, "projector_lr": 3.19794098390126e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.65625, "sft_loss": 0.6953125, "step": 4940 }, { "dpo_loss": 0.041748046875, "epoch": 0.79, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 1.0644138394984465e-07, "loss": 0.1508, "projector_lr": 3.1932415184953397e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.23828125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.25, "sft_loss": 0.75, "step": 4941 }, { "dpo_loss": 0.1201171875, "epoch": 0.79, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 1.0628483657561348e-07, "loss": 0.1676, "projector_lr": 3.188545097268405e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.337890625, "rewards_train/margins": 3.625, "rewards_train/rejected": -3.96875, "sft_loss": 0.75, "step": 4942 }, { "dpo_loss": 0.0673828125, "epoch": 0.79, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 1.0612839071437824e-07, "loss": 0.0435, "projector_lr": 3.1838517214313474e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.09375, "sft_loss": 0.625, "step": 4943 }, { "dpo_loss": 0.375, "epoch": 0.79, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 1.0597204640647589e-07, "loss": 0.315, "projector_lr": 3.179161392194277e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.75, "sft_loss": 0.76953125, "step": 4944 }, { "dpo_loss": 0.19140625, "epoch": 0.79, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 1.0581580369221743e-07, "loss": 0.2556, "projector_lr": 3.174474110766523e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.373046875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.34375, "sft_loss": 0.6796875, "step": 4945 }, { "dpo_loss": 0.04150390625, "epoch": 0.79, "final_loss": 0.04150390625, "grad_norm": 0.0, "learning_rate": 1.056596626118873e-07, "loss": 0.1157, "projector_lr": 3.169789878356619e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.78125, "sft_loss": 0.99609375, "step": 4946 }, { "dpo_loss": 0.026123046875, "epoch": 0.79, "final_loss": 0.026123046875, "grad_norm": 0.0, "learning_rate": 1.0550362320574424e-07, "loss": 0.0483, "projector_lr": 3.1651086961723275e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.78125, "sft_loss": 0.48828125, "step": 4947 }, { "dpo_loss": 0.458984375, "epoch": 0.79, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 1.0534768551401995e-07, "loss": 0.2425, "projector_lr": 3.160430565420599e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 2.390625, "rewards_train/rejected": -3.8125, "sft_loss": 1.1328125, "step": 4948 }, { "dpo_loss": 0.2216796875, "epoch": 0.79, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 1.0519184957692079e-07, "loss": 0.3054, "projector_lr": 3.155755487307624e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 3.28125, "rewards_train/rejected": -3.84375, "sft_loss": 0.796875, "step": 4949 }, { "dpo_loss": 0.006072998046875, "epoch": 0.79, "final_loss": 0.006072998046875, "grad_norm": 0.0, "learning_rate": 1.0503611543462626e-07, "loss": 0.3232, "projector_lr": 3.151083463038788e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.8125, "sft_loss": 0.6328125, "step": 4950 }, { "dpo_loss": 0.033935546875, "epoch": 0.79, "final_loss": 0.033935546875, "grad_norm": 0.0, "learning_rate": 1.048804831272902e-07, "loss": 0.0628, "projector_lr": 3.146414493818706e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.3125, "sft_loss": 0.89453125, "step": 4951 }, { "dpo_loss": 0.0137939453125, "epoch": 0.79, "final_loss": 0.0137939453125, "grad_norm": 0.0, "learning_rate": 1.0472495269503939e-07, "loss": 0.0617, "projector_lr": 3.141748580851182e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.0118408203125, "rewards_train/margins": 6.71875, "rewards_train/rejected": -6.6875, "sft_loss": 0.5546875, "step": 4952 }, { "dpo_loss": 0.1416015625, "epoch": 0.79, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 1.0456952417797516e-07, "loss": 0.371, "projector_lr": 3.137085725339255e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.8125, "sft_loss": 0.875, "step": 4953 }, { "dpo_loss": 0.234375, "epoch": 0.79, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 1.0441419761617193e-07, "loss": 0.1673, "projector_lr": 3.132425928485158e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.90625, "sft_loss": 0.427734375, "step": 4954 }, { "dpo_loss": 0.244140625, "epoch": 0.79, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 1.042589730496784e-07, "loss": 0.1858, "projector_lr": 3.127769191490352e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.71875, "sft_loss": 0.4921875, "step": 4955 }, { "dpo_loss": 0.0155029296875, "epoch": 0.79, "final_loss": 0.0155029296875, "grad_norm": 0.0, "learning_rate": 1.041038505185165e-07, "loss": 0.1379, "projector_lr": 3.123115515555495e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 6.9375, "rewards_train/rejected": -7.5, "sft_loss": 0.72265625, "step": 4956 }, { "dpo_loss": 0.0400390625, "epoch": 0.79, "final_loss": 0.0400390625, "grad_norm": 0.0, "learning_rate": 1.0394883006268207e-07, "loss": 0.0519, "projector_lr": 3.1184649018804624e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 6.78125, "rewards_train/rejected": -7.96875, "sft_loss": 0.5703125, "step": 4957 }, { "dpo_loss": 0.15234375, "epoch": 0.79, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 1.0379391172214442e-07, "loss": 0.1218, "projector_lr": 3.1138173516643326e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.875, "sft_loss": 0.765625, "step": 4958 }, { "dpo_loss": 0.71484375, "epoch": 0.79, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 1.0363909553684703e-07, "loss": 0.5178, "projector_lr": 3.1091728661054107e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.75, "sft_loss": 0.6796875, "step": 4959 }, { "dpo_loss": 0.06494140625, "epoch": 0.79, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 1.0348438154670652e-07, "loss": 0.0426, "projector_lr": 3.104531446401196e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15234375, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.15625, "sft_loss": 0.75, "step": 4960 }, { "dpo_loss": 0.365234375, "epoch": 0.79, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 1.033297697916134e-07, "loss": 0.3192, "projector_lr": 3.099893093748402e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -5.21875, "sft_loss": 0.77734375, "step": 4961 }, { "dpo_loss": 0.0771484375, "epoch": 0.79, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 1.031752603114316e-07, "loss": 0.2307, "projector_lr": 3.095257809342948e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 7.15625, "rewards_train/rejected": -7.6875, "sft_loss": 0.7734375, "step": 4962 }, { "dpo_loss": 0.2734375, "epoch": 0.79, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 1.0302085314599911e-07, "loss": 0.2785, "projector_lr": 3.0906255943799736e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.5, "sft_loss": 0.7265625, "step": 4963 }, { "dpo_loss": 0.033447265625, "epoch": 0.79, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 1.028665483351271e-07, "loss": 0.087, "projector_lr": 3.085996450053813e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.59375, "sft_loss": 0.73046875, "step": 4964 }, { "dpo_loss": 0.07568359375, "epoch": 0.79, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 1.0271234591860085e-07, "loss": 0.087, "projector_lr": 3.081370377558025e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.59375, "sft_loss": 0.61328125, "step": 4965 }, { "dpo_loss": 0.0771484375, "epoch": 0.79, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 1.0255824593617845e-07, "loss": 0.067, "projector_lr": 3.076747378085354e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.125, "sft_loss": 0.82421875, "step": 4966 }, { "dpo_loss": 0.1328125, "epoch": 0.79, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 1.0240424842759238e-07, "loss": 0.0985, "projector_lr": 3.0721274528277715e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.125, "sft_loss": 0.984375, "step": 4967 }, { "dpo_loss": 0.0026397705078125, "epoch": 0.79, "final_loss": 0.0026397705078125, "grad_norm": 0.0, "learning_rate": 1.0225035343254818e-07, "loss": 0.0557, "projector_lr": 3.0675106029764457e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 7.59375, "rewards_train/rejected": -8.75, "sft_loss": 0.93359375, "step": 4968 }, { "dpo_loss": 0.2001953125, "epoch": 0.8, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 1.0209656099072539e-07, "loss": 0.2296, "projector_lr": 3.0628968297217617e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.5, "sft_loss": 0.6484375, "step": 4969 }, { "dpo_loss": 0.07080078125, "epoch": 0.8, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 1.0194287114177674e-07, "loss": 0.3507, "projector_lr": 3.0582861342533027e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 7.28125, "rewards_train/rejected": -8.25, "sft_loss": 0.71484375, "step": 4970 }, { "dpo_loss": 0.4140625, "epoch": 0.8, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 1.0178928392532859e-07, "loss": 0.2451, "projector_lr": 3.053678517759858e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.21875, "sft_loss": 0.91796875, "step": 4971 }, { "dpo_loss": 0.1533203125, "epoch": 0.8, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 1.0163579938098082e-07, "loss": 0.0897, "projector_lr": 3.049073981429425e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.25, "sft_loss": 0.95703125, "step": 4972 }, { "dpo_loss": 0.34765625, "epoch": 0.8, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 1.0148241754830711e-07, "loss": 0.19, "projector_lr": 3.0444725264492134e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.765625, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.9375, "sft_loss": 1.4765625, "step": 4973 }, { "dpo_loss": 0.04541015625, "epoch": 0.8, "final_loss": 0.04541015625, "grad_norm": 0.0, "learning_rate": 1.0132913846685436e-07, "loss": 0.1192, "projector_lr": 3.0398741540056316e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.09375, "sft_loss": 0.890625, "step": 4974 }, { "dpo_loss": 0.30078125, "epoch": 0.8, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 1.0117596217614294e-07, "loss": 0.3581, "projector_lr": 3.0352788652842884e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.34375, "sft_loss": 0.71875, "step": 4975 }, { "dpo_loss": 0.2373046875, "epoch": 0.8, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 1.0102288871566705e-07, "loss": 0.1692, "projector_lr": 3.030686661470012e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.625, "sft_loss": 0.70703125, "step": 4976 }, { "dpo_loss": 0.298828125, "epoch": 0.8, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 1.0086991812489409e-07, "loss": 0.2044, "projector_lr": 3.026097543746823e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.21875, "sft_loss": 0.88671875, "step": 4977 }, { "dpo_loss": 0.150390625, "epoch": 0.8, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 1.00717050443265e-07, "loss": 0.2118, "projector_lr": 3.02151151329795e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.90625, "sft_loss": 0.84375, "step": 4978 }, { "dpo_loss": 0.03564453125, "epoch": 0.8, "final_loss": 0.03564453125, "grad_norm": 0.0, "learning_rate": 1.0056428571019403e-07, "loss": 0.0376, "projector_lr": 3.0169285713058213e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.53125, "sft_loss": 1.3203125, "step": 4979 }, { "dpo_loss": 0.09423828125, "epoch": 0.8, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 1.0041162396506942e-07, "loss": 0.1938, "projector_lr": 3.0123487189520826e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.0625, "sft_loss": 0.7578125, "step": 4980 }, { "dpo_loss": 0.1630859375, "epoch": 0.8, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 1.002590652472522e-07, "loss": 0.1488, "projector_lr": 3.007771957417566e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.703125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.96875, "sft_loss": 0.94140625, "step": 4981 }, { "dpo_loss": 0.037841796875, "epoch": 0.8, "final_loss": 0.037841796875, "grad_norm": 0.0, "learning_rate": 1.001066095960773e-07, "loss": 0.2884, "projector_lr": 3.0031982878823196e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.75, "sft_loss": 0.8984375, "step": 4982 }, { "dpo_loss": 0.14453125, "epoch": 0.8, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 9.995425705085292e-08, "loss": 0.1603, "projector_lr": 2.9986277115255877e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 3.140625, "rewards_train/rejected": -4.8125, "sft_loss": 0.953125, "step": 4983 }, { "dpo_loss": 0.006317138671875, "epoch": 0.8, "final_loss": 0.006317138671875, "grad_norm": 0.0, "learning_rate": 9.980200765086055e-08, "loss": 0.0695, "projector_lr": 2.994060229525817e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1474609375, "rewards_train/margins": 5.75, "rewards_train/rejected": -5.90625, "sft_loss": 0.6640625, "step": 4984 }, { "dpo_loss": 0.384765625, "epoch": 0.8, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 9.964986143535514e-08, "loss": 0.3226, "projector_lr": 2.9894958430606544e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5390625, "rewards_train/margins": 2.03125, "rewards_train/rejected": -3.5625, "sft_loss": 0.72265625, "step": 4985 }, { "dpo_loss": 0.08544921875, "epoch": 0.8, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 9.949781844356536e-08, "loss": 0.1997, "projector_lr": 2.984934553306961e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.375, "sft_loss": 0.70703125, "step": 4986 }, { "dpo_loss": 0.458984375, "epoch": 0.8, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 9.93458787146928e-08, "loss": 0.3377, "projector_lr": 2.9803763614407844e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.203125, "rewards_train/margins": 4.5, "rewards_train/rejected": -4.6875, "sft_loss": 0.75390625, "step": 4987 }, { "dpo_loss": 0.28125, "epoch": 0.8, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 9.919404228791267e-08, "loss": 0.1779, "projector_lr": 2.9758212686373806e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.5625, "sft_loss": 0.90625, "step": 4988 }, { "dpo_loss": 0.06591796875, "epoch": 0.8, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 9.904230920237338e-08, "loss": 0.061, "projector_lr": 2.9712692760712017e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.5625, "sft_loss": 0.80859375, "step": 4989 }, { "dpo_loss": 0.1279296875, "epoch": 0.8, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 9.88906794971971e-08, "loss": 0.0837, "projector_lr": 2.966720384915913e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.5390625, "rewards_train/margins": 5.75, "rewards_train/rejected": -5.21875, "sft_loss": 0.83203125, "step": 4990 }, { "dpo_loss": 0.1513671875, "epoch": 0.8, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 9.873915321147874e-08, "loss": 0.1025, "projector_lr": 2.9621745963443624e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.78125, "sft_loss": 0.8125, "step": 4991 }, { "dpo_loss": 0.546875, "epoch": 0.8, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 9.85877303842873e-08, "loss": 0.3831, "projector_lr": 2.9576319115286195e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.703125, "rewards_train/margins": 1.609375, "rewards_train/rejected": -4.3125, "sft_loss": 0.9140625, "step": 4992 }, { "dpo_loss": 0.4140625, "epoch": 0.8, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 9.843641105466416e-08, "loss": 0.2854, "projector_lr": 2.953092331639925e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7421875, "rewards_train/margins": 2.9375, "rewards_train/rejected": -4.6875, "sft_loss": 0.90625, "step": 4993 }, { "dpo_loss": 0.27734375, "epoch": 0.8, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 9.828519526162488e-08, "loss": 0.1534, "projector_lr": 2.9485558578487466e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.0, "sft_loss": 1.03125, "step": 4994 }, { "dpo_loss": 0.2890625, "epoch": 0.8, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 9.813408304415771e-08, "loss": 0.286, "projector_lr": 2.9440224913247314e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.96875, "sft_loss": 0.7109375, "step": 4995 }, { "dpo_loss": 0.030029296875, "epoch": 0.8, "final_loss": 0.030029296875, "grad_norm": 0.0, "learning_rate": 9.798307444122489e-08, "loss": 0.0501, "projector_lr": 2.939492233236747e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1748046875, "rewards_train/margins": 7.3125, "rewards_train/rejected": -7.5, "sft_loss": 0.625, "step": 4996 }, { "dpo_loss": 0.01226806640625, "epoch": 0.8, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 9.783216949176093e-08, "loss": 0.3255, "projector_lr": 2.934965084752828e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 6.78125, "rewards_train/rejected": -8.0, "sft_loss": 0.88671875, "step": 4997 }, { "dpo_loss": 0.03662109375, "epoch": 0.8, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 9.768136823467466e-08, "loss": 0.0751, "projector_lr": 2.93044104704024e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.65625, "sft_loss": 0.91015625, "step": 4998 }, { "dpo_loss": 0.197265625, "epoch": 0.8, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 9.753067070884735e-08, "loss": 0.123, "projector_lr": 2.9259201212654206e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 2.984375, "rewards_train/rejected": -4.21875, "sft_loss": 0.91015625, "step": 4999 }, { "dpo_loss": 0.0206298828125, "epoch": 0.8, "final_loss": 0.0206298828125, "grad_norm": 0.0, "learning_rate": 9.738007695313427e-08, "loss": 0.0213, "projector_lr": 2.921402308594028e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.828125, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.46875, "sft_loss": 1.125, "step": 5000 }, { "dpo_loss": 0.1845703125, "epoch": 0.8, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 9.722958700636335e-08, "loss": 0.1097, "projector_lr": 2.916887610190901e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.84375, "sft_loss": 0.75, "step": 5001 }, { "dpo_loss": 0.0947265625, "epoch": 0.8, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 9.707920090733595e-08, "loss": 0.123, "projector_lr": 2.912376027220079e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1787109375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -5.21875, "sft_loss": 1.1015625, "step": 5002 }, { "dpo_loss": 0.07861328125, "epoch": 0.8, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 9.69289186948265e-08, "loss": 0.1401, "projector_lr": 2.9078675608447953e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.34375, "sft_loss": 0.8828125, "step": 5003 }, { "dpo_loss": 0.419921875, "epoch": 0.8, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 9.677874040758315e-08, "loss": 0.3051, "projector_lr": 2.903362212227495e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.375, "sft_loss": 0.91015625, "step": 5004 }, { "dpo_loss": 0.1064453125, "epoch": 0.8, "final_loss": 0.1064453125, "grad_norm": 0.0, "learning_rate": 9.662866608432674e-08, "loss": 0.0794, "projector_lr": 2.8988599825298026e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.75, "rewards_train/margins": 5.46875, "rewards_train/rejected": -7.21875, "sft_loss": 1.140625, "step": 5005 }, { "dpo_loss": 0.1103515625, "epoch": 0.8, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 9.647869576375146e-08, "loss": 0.0899, "projector_lr": 2.8943608729125437e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.640625, "sft_loss": 0.7421875, "step": 5006 }, { "dpo_loss": 0.1806640625, "epoch": 0.8, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 9.632882948452453e-08, "loss": 0.1057, "projector_lr": 2.889864884535736e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.78125, "sft_loss": 0.6484375, "step": 5007 }, { "dpo_loss": 0.1318359375, "epoch": 0.8, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 9.617906728528679e-08, "loss": 0.138, "projector_lr": 2.8853720185586043e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.71875, "sft_loss": 0.62890625, "step": 5008 }, { "dpo_loss": 0.59765625, "epoch": 0.8, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 9.60294092046518e-08, "loss": 0.3908, "projector_lr": 2.8808822761395543e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 1.5546875, "rewards_train/rejected": -3.046875, "sft_loss": 0.74609375, "step": 5009 }, { "dpo_loss": 0.0213623046875, "epoch": 0.8, "final_loss": 0.0213623046875, "grad_norm": 0.0, "learning_rate": 9.587985528120657e-08, "loss": 0.1496, "projector_lr": 2.8763956584361973e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.21875, "sft_loss": 0.79296875, "step": 5010 }, { "dpo_loss": 0.279296875, "epoch": 0.8, "final_loss": 0.279296875, "grad_norm": 0.0, "learning_rate": 9.573040555351108e-08, "loss": 0.1683, "projector_lr": 2.8719121666053327e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.15625, "sft_loss": 0.84375, "step": 5011 }, { "dpo_loss": 0.040283203125, "epoch": 0.8, "final_loss": 0.040283203125, "grad_norm": 0.0, "learning_rate": 9.558106006009848e-08, "loss": 0.0255, "projector_lr": 2.8674318018029544e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.5, "sft_loss": 0.57421875, "step": 5012 }, { "dpo_loss": 0.16796875, "epoch": 0.8, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.543181883947488e-08, "loss": 0.302, "projector_lr": 2.8629545651842466e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.78125, "sft_loss": 1.109375, "step": 5013 }, { "dpo_loss": 0.1943359375, "epoch": 0.8, "final_loss": 0.1943359375, "grad_norm": 0.0, "learning_rate": 9.528268193011996e-08, "loss": 0.1268, "projector_lr": 2.858480457903599e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.9375, "sft_loss": 0.6953125, "step": 5014 }, { "dpo_loss": 0.0224609375, "epoch": 0.8, "final_loss": 0.0224609375, "grad_norm": 0.0, "learning_rate": 9.513364937048613e-08, "loss": 0.1607, "projector_lr": 2.854009481114584e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.59375, "sft_loss": 0.75, "step": 5015 }, { "dpo_loss": 0.0238037109375, "epoch": 0.8, "final_loss": 0.0238037109375, "grad_norm": 0.0, "learning_rate": 9.49847211989988e-08, "loss": 0.1461, "projector_lr": 2.849541635969964e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 6.6875, "rewards_train/rejected": -8.125, "sft_loss": 0.9140625, "step": 5016 }, { "dpo_loss": 0.0849609375, "epoch": 0.8, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 9.483589745405696e-08, "loss": 0.2092, "projector_lr": 2.845076923621709e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.84375, "sft_loss": 0.48046875, "step": 5017 }, { "dpo_loss": 0.1796875, "epoch": 0.8, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 9.468717817403227e-08, "loss": 0.14, "projector_lr": 2.8406153452209684e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.1875, "sft_loss": 1.1015625, "step": 5018 }, { "dpo_loss": 0.014892578125, "epoch": 0.8, "final_loss": 0.014892578125, "grad_norm": 0.0, "learning_rate": 9.453856339726952e-08, "loss": 0.0692, "projector_lr": 2.836156901918086e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.78125, "sft_loss": 0.88671875, "step": 5019 }, { "dpo_loss": 0.35546875, "epoch": 0.8, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 9.439005316208642e-08, "loss": 0.3519, "projector_lr": 2.831701594862593e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.263671875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -4.9375, "sft_loss": 0.64453125, "step": 5020 }, { "dpo_loss": 0.07470703125, "epoch": 0.8, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 9.424164750677421e-08, "loss": 0.1411, "projector_lr": 2.8272494252032266e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.90625, "sft_loss": 0.78515625, "step": 5021 }, { "dpo_loss": 0.4296875, "epoch": 0.8, "final_loss": 0.4296875, "grad_norm": 0.0, "learning_rate": 9.40933464695966e-08, "loss": 0.2273, "projector_lr": 2.8228003940878985e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.9375, "sft_loss": 0.73046875, "step": 5022 }, { "dpo_loss": 0.031494140625, "epoch": 0.8, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 9.394515008879094e-08, "loss": 0.1126, "projector_lr": 2.8183545026637286e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.287109375, "rewards_train/margins": 5.96875, "rewards_train/rejected": -5.6875, "sft_loss": 0.75, "step": 5023 }, { "dpo_loss": 0.1474609375, "epoch": 0.8, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 9.379705840256674e-08, "loss": 0.1591, "projector_lr": 2.8139117520770024e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6328125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.84375, "sft_loss": 0.84375, "step": 5024 }, { "dpo_loss": 0.1865234375, "epoch": 0.8, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 9.364907144910734e-08, "loss": 0.1485, "projector_lr": 2.8094721434732204e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.4375, "sft_loss": 0.7734375, "step": 5025 }, { "dpo_loss": 0.400390625, "epoch": 0.8, "final_loss": 0.400390625, "grad_norm": 0.0, "learning_rate": 9.350118926656858e-08, "loss": 0.2504, "projector_lr": 2.8050356779970575e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 3.265625, "rewards_train/rejected": -4.0, "sft_loss": 0.81640625, "step": 5026 }, { "dpo_loss": 0.10107421875, "epoch": 0.8, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 9.335341189307965e-08, "loss": 0.201, "projector_lr": 2.80060235679239e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.423828125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.375, "sft_loss": 0.75, "step": 5027 }, { "dpo_loss": 0.09130859375, "epoch": 0.8, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 9.320573936674248e-08, "loss": 0.0787, "projector_lr": 2.7961721810022743e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 6.28125, "rewards_train/rejected": -7.28125, "sft_loss": 0.67578125, "step": 5028 }, { "dpo_loss": 0.1552734375, "epoch": 0.8, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 9.305817172563191e-08, "loss": 0.1056, "projector_lr": 2.7917451517689573e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 2.96875, "rewards_train/rejected": -4.09375, "sft_loss": 0.828125, "step": 5029 }, { "dpo_loss": 0.119140625, "epoch": 0.8, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 9.291070900779569e-08, "loss": 0.1344, "projector_lr": 2.7873212702338705e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.1875, "sft_loss": 0.96484375, "step": 5030 }, { "dpo_loss": 0.30078125, "epoch": 0.8, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 9.276335125125501e-08, "loss": 0.2104, "projector_lr": 2.7829005375376506e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.34375, "sft_loss": 1.140625, "step": 5031 }, { "dpo_loss": 0.0625, "epoch": 0.81, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 9.261609849400348e-08, "loss": 0.0798, "projector_lr": 2.778482954820105e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.486328125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.40625, "sft_loss": 0.7734375, "step": 5032 }, { "dpo_loss": 0.37109375, "epoch": 0.81, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 9.24689507740078e-08, "loss": 0.6195, "projector_lr": 2.774068523220234e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 3.21875, "rewards_train/rejected": -4.6875, "sft_loss": 0.8515625, "step": 5033 }, { "dpo_loss": 0.10791015625, "epoch": 0.81, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 9.232190812920749e-08, "loss": 0.1542, "projector_lr": 2.7696572438762247e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.09375, "sft_loss": 0.94921875, "step": 5034 }, { "dpo_loss": 0.09521484375, "epoch": 0.81, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 9.217497059751528e-08, "loss": 0.0857, "projector_lr": 2.765249117925459e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.022705078125, "rewards_train/margins": 6.84375, "rewards_train/rejected": -6.84375, "sft_loss": 0.58203125, "step": 5035 }, { "dpo_loss": 0.0208740234375, "epoch": 0.81, "final_loss": 0.0208740234375, "grad_norm": 0.0, "learning_rate": 9.202813821681643e-08, "loss": 0.0797, "projector_lr": 2.760844146504493e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.875, "sft_loss": 0.84765625, "step": 5036 }, { "dpo_loss": 0.248046875, "epoch": 0.81, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 9.18814110249696e-08, "loss": 0.1811, "projector_lr": 2.7564423307490884e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.15625, "sft_loss": 0.8828125, "step": 5037 }, { "dpo_loss": 0.671875, "epoch": 0.81, "final_loss": 0.671875, "grad_norm": 0.0, "learning_rate": 9.173478905980553e-08, "loss": 0.3995, "projector_lr": 2.752043671794166e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.171875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -6.09375, "sft_loss": 0.8359375, "step": 5038 }, { "dpo_loss": 0.314453125, "epoch": 0.81, "final_loss": 0.314453125, "grad_norm": 0.0, "learning_rate": 9.158827235912853e-08, "loss": 0.2169, "projector_lr": 2.747648170773856e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3046875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.875, "sft_loss": 0.6640625, "step": 5039 }, { "dpo_loss": 0.0595703125, "epoch": 0.81, "final_loss": 0.0595703125, "grad_norm": 0.0, "learning_rate": 9.144186096071544e-08, "loss": 0.1037, "projector_lr": 2.7432558288214634e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.96875, "sft_loss": 0.75390625, "step": 5040 }, { "dpo_loss": 0.039794921875, "epoch": 0.81, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 9.129555490231634e-08, "loss": 0.1225, "projector_lr": 2.73886664706949e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.71875, "sft_loss": 0.76171875, "step": 5041 }, { "dpo_loss": 0.02294921875, "epoch": 0.81, "final_loss": 0.02294921875, "grad_norm": 0.0, "learning_rate": 9.114935422165332e-08, "loss": 0.0341, "projector_lr": 2.7344806266496e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.28125, "sft_loss": 0.87109375, "step": 5042 }, { "dpo_loss": 0.047119140625, "epoch": 0.81, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 9.100325895642224e-08, "loss": 0.2404, "projector_lr": 2.730097768692668e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.5625, "sft_loss": 0.7265625, "step": 5043 }, { "dpo_loss": 0.1357421875, "epoch": 0.81, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 9.085726914429115e-08, "loss": 0.098, "projector_lr": 2.725718074328735e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.052490234375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.0625, "sft_loss": 0.6640625, "step": 5044 }, { "dpo_loss": 0.451171875, "epoch": 0.81, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 9.071138482290131e-08, "loss": 0.2327, "projector_lr": 2.7213415446870396e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.84375, "sft_loss": 0.94921875, "step": 5045 }, { "dpo_loss": 0.1416015625, "epoch": 0.81, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 9.056560602986645e-08, "loss": 0.1526, "projector_lr": 2.716968180895994e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.322265625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.65625, "sft_loss": 0.671875, "step": 5046 }, { "dpo_loss": 0.10107421875, "epoch": 0.81, "final_loss": 0.10107421875, "grad_norm": 0.0, "learning_rate": 9.041993280277332e-08, "loss": 0.1029, "projector_lr": 2.7125979840832e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 7.75, "rewards_train/rejected": -8.5, "sft_loss": 0.76953125, "step": 5047 }, { "dpo_loss": 0.185546875, "epoch": 0.81, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 9.027436517918125e-08, "loss": 0.1198, "projector_lr": 2.708230955375438e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.84375, "sft_loss": 0.79296875, "step": 5048 }, { "dpo_loss": 0.076171875, "epoch": 0.81, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 9.01289031966227e-08, "loss": 0.2464, "projector_lr": 2.7038670958986813e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.25, "sft_loss": 0.7890625, "step": 5049 }, { "dpo_loss": 0.08203125, "epoch": 0.81, "final_loss": 0.08203125, "grad_norm": 0.0, "learning_rate": 8.998354689260246e-08, "loss": 0.3243, "projector_lr": 2.699506406778074e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48828125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.78125, "sft_loss": 0.70703125, "step": 5050 }, { "dpo_loss": 0.515625, "epoch": 0.81, "final_loss": 0.515625, "grad_norm": 0.0, "learning_rate": 8.983829630459821e-08, "loss": 0.3066, "projector_lr": 2.695148889137947e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.109375, "rewards_train/margins": 2.28125, "rewards_train/rejected": -3.390625, "sft_loss": 0.70703125, "step": 5051 }, { "dpo_loss": 0.1298828125, "epoch": 0.81, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 8.969315147006068e-08, "loss": 0.1976, "projector_lr": 2.6907945441018206e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.59375, "sft_loss": 0.59765625, "step": 5052 }, { "dpo_loss": 0.6015625, "epoch": 0.81, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 8.954811242641297e-08, "loss": 0.3113, "projector_lr": 2.6864433727923893e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.53125, "rewards_train/rejected": -4.5625, "sft_loss": 0.62890625, "step": 5053 }, { "dpo_loss": 0.0020294189453125, "epoch": 0.81, "final_loss": 0.0020294189453125, "grad_norm": 0.0, "learning_rate": 8.940317921105084e-08, "loss": 0.0062, "projector_lr": 2.6820953763315253e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 8.25, "rewards_train/rejected": -8.875, "sft_loss": 0.67578125, "step": 5054 }, { "dpo_loss": 0.169921875, "epoch": 0.81, "final_loss": 0.169921875, "grad_norm": 0.0, "learning_rate": 8.925835186134323e-08, "loss": 0.2011, "projector_lr": 2.677750555840297e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.8125, "sft_loss": 1.015625, "step": 5055 }, { "dpo_loss": 0.3671875, "epoch": 0.81, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 8.911363041463133e-08, "loss": 0.2141, "projector_lr": 2.67340891243894e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.796875, "rewards_train/margins": 2.546875, "rewards_train/rejected": -4.34375, "sft_loss": 0.96484375, "step": 5056 }, { "dpo_loss": 0.0498046875, "epoch": 0.81, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 8.896901490822911e-08, "loss": 0.4265, "projector_lr": 2.6690704472468737e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.125, "sft_loss": 0.6953125, "step": 5057 }, { "dpo_loss": 0.03076171875, "epoch": 0.81, "final_loss": 0.03076171875, "grad_norm": 0.0, "learning_rate": 8.88245053794236e-08, "loss": 0.0514, "projector_lr": 2.664735161382708e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.125, "sft_loss": 0.59375, "step": 5058 }, { "dpo_loss": 0.4140625, "epoch": 0.81, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 8.868010186547397e-08, "loss": 0.3638, "projector_lr": 2.6604030559642196e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.828125, "sft_loss": 0.81640625, "step": 5059 }, { "dpo_loss": 0.1484375, "epoch": 0.81, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 8.853580440361241e-08, "loss": 0.0973, "projector_lr": 2.6560741321083724e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.390625, "rewards_train/margins": 6.53125, "rewards_train/rejected": -6.9375, "sft_loss": 0.8125, "step": 5060 }, { "dpo_loss": 0.53125, "epoch": 0.81, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 8.839161303104343e-08, "loss": 0.3003, "projector_lr": 2.651748390931303e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 2.78125, "rewards_train/rejected": -3.5, "sft_loss": 0.734375, "step": 5061 }, { "dpo_loss": 0.11279296875, "epoch": 0.81, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 8.824752778494476e-08, "loss": 0.0693, "projector_lr": 2.647425833548343e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.77734375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.21875, "sft_loss": 0.7578125, "step": 5062 }, { "dpo_loss": 0.23828125, "epoch": 0.81, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 8.810354870246606e-08, "loss": 0.1308, "projector_lr": 2.643106461073982e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.09375, "sft_loss": 0.578125, "step": 5063 }, { "dpo_loss": 0.0908203125, "epoch": 0.81, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 8.795967582073044e-08, "loss": 0.1312, "projector_lr": 2.6387902746219133e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.59375, "sft_loss": 0.67578125, "step": 5064 }, { "dpo_loss": 0.482421875, "epoch": 0.81, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 8.781590917683263e-08, "loss": 0.2528, "projector_lr": 2.634477275304979e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.0625, "sft_loss": 0.9453125, "step": 5065 }, { "dpo_loss": 0.412109375, "epoch": 0.81, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 8.767224880784086e-08, "loss": 0.2945, "projector_lr": 2.630167464235226e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.46875, "sft_loss": 0.55078125, "step": 5066 }, { "dpo_loss": 0.0308837890625, "epoch": 0.81, "final_loss": 0.0308837890625, "grad_norm": 0.0, "learning_rate": 8.752869475079533e-08, "loss": 0.0608, "projector_lr": 2.62586084252386e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.486328125, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.5, "sft_loss": 0.890625, "step": 5067 }, { "dpo_loss": 0.034423828125, "epoch": 0.81, "final_loss": 0.034423828125, "grad_norm": 0.0, "learning_rate": 8.738524704270955e-08, "loss": 0.0328, "projector_lr": 2.621557411281287e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.375, "sft_loss": 0.734375, "step": 5068 }, { "dpo_loss": 0.1689453125, "epoch": 0.81, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 8.724190572056855e-08, "loss": 0.1327, "projector_lr": 2.6172571716170566e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51953125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.1875, "sft_loss": 0.84375, "step": 5069 }, { "dpo_loss": 0.09423828125, "epoch": 0.81, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 8.709867082133104e-08, "loss": 0.0706, "projector_lr": 2.6129601246399314e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.796875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.96875, "sft_loss": 0.8203125, "step": 5070 }, { "dpo_loss": 0.01361083984375, "epoch": 0.81, "final_loss": 0.01361083984375, "grad_norm": 0.0, "learning_rate": 8.695554238192748e-08, "loss": 0.129, "projector_lr": 2.6086662714578244e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.099609375, "rewards_train/margins": 7.6875, "rewards_train/rejected": -7.78125, "sft_loss": 0.6796875, "step": 5071 }, { "dpo_loss": 0.0274658203125, "epoch": 0.81, "final_loss": 0.0274658203125, "grad_norm": 0.0, "learning_rate": 8.68125204392614e-08, "loss": 0.1983, "projector_lr": 2.604375613177842e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0074462890625, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.3125, "sft_loss": 0.859375, "step": 5072 }, { "dpo_loss": 0.154296875, "epoch": 0.81, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 8.666960503020859e-08, "loss": 0.2529, "projector_lr": 2.600088150906258e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.46875, "sft_loss": 0.921875, "step": 5073 }, { "dpo_loss": 0.2158203125, "epoch": 0.81, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 8.65267961916174e-08, "loss": 0.1187, "projector_lr": 2.5958038857485223e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.0625, "sft_loss": 0.7890625, "step": 5074 }, { "dpo_loss": 0.1669921875, "epoch": 0.81, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 8.638409396030866e-08, "loss": 0.0932, "projector_lr": 2.5915228188092597e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.10546875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.1875, "sft_loss": 0.52734375, "step": 5075 }, { "dpo_loss": 0.050537109375, "epoch": 0.81, "final_loss": 0.050537109375, "grad_norm": 0.0, "learning_rate": 8.624149837307604e-08, "loss": 0.0707, "projector_lr": 2.587244951192281e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.625, "sft_loss": 0.703125, "step": 5076 }, { "dpo_loss": 0.0478515625, "epoch": 0.81, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 8.609900946668535e-08, "loss": 0.1497, "projector_lr": 2.582970284000561e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.59375, "sft_loss": 2.09375, "step": 5077 }, { "dpo_loss": 0.298828125, "epoch": 0.81, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 8.5956627277875e-08, "loss": 0.3127, "projector_lr": 2.57869881833625e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.953125, "sft_loss": 1.0625, "step": 5078 }, { "dpo_loss": 0.119140625, "epoch": 0.81, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 8.581435184335578e-08, "loss": 0.0733, "projector_lr": 2.5744305553006737e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.08203125, "rewards_train/margins": 5.75, "rewards_train/rejected": -5.84375, "sft_loss": 0.72265625, "step": 5079 }, { "dpo_loss": 0.057373046875, "epoch": 0.81, "final_loss": 0.057373046875, "grad_norm": 0.0, "learning_rate": 8.567218319981134e-08, "loss": 0.0835, "projector_lr": 2.5701654959943403e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.1875, "sft_loss": 0.90234375, "step": 5080 }, { "dpo_loss": 0.275390625, "epoch": 0.81, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 8.553012138389732e-08, "loss": 0.2946, "projector_lr": 2.5659036415169195e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.8125, "sft_loss": 0.73046875, "step": 5081 }, { "dpo_loss": 0.310546875, "epoch": 0.81, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 8.538816643224233e-08, "loss": 0.2448, "projector_lr": 2.56164499296727e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 4.5, "rewards_train/rejected": -4.96875, "sft_loss": 0.49609375, "step": 5082 }, { "dpo_loss": 0.061767578125, "epoch": 0.81, "final_loss": 0.061767578125, "grad_norm": 0.0, "learning_rate": 8.524631838144663e-08, "loss": 0.054, "projector_lr": 2.5573895514433993e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.59375, "sft_loss": 0.92578125, "step": 5083 }, { "dpo_loss": 0.62109375, "epoch": 0.81, "final_loss": 0.62109375, "grad_norm": 0.0, "learning_rate": 8.510457726808384e-08, "loss": 0.3242, "projector_lr": 2.5531373180425155e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.21875, "rewards_train/margins": 2.859375, "rewards_train/rejected": -5.09375, "sft_loss": 0.91015625, "step": 5084 }, { "dpo_loss": 0.08837890625, "epoch": 0.81, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 8.496294312869928e-08, "loss": 0.2249, "projector_lr": 2.5488882938609786e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.15625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.8125, "sft_loss": 0.9765625, "step": 5085 }, { "dpo_loss": 0.01226806640625, "epoch": 0.81, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 8.48214159998113e-08, "loss": 0.0186, "projector_lr": 2.544642479994339e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.09375, "sft_loss": 0.7734375, "step": 5086 }, { "dpo_loss": 0.359375, "epoch": 0.81, "final_loss": 0.359375, "grad_norm": 0.0, "learning_rate": 8.46799959179102e-08, "loss": 0.2721, "projector_lr": 2.540399877537306e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.171875, "rewards_train/margins": 2.296875, "rewards_train/rejected": -4.46875, "sft_loss": 0.8984375, "step": 5087 }, { "dpo_loss": 0.29296875, "epoch": 0.81, "final_loss": 0.29296875, "grad_norm": 0.0, "learning_rate": 8.453868291945876e-08, "loss": 0.3643, "projector_lr": 2.536160487583763e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.09375, "rewards_train/margins": 2.9375, "rewards_train/rejected": -5.03125, "sft_loss": 0.68359375, "step": 5088 }, { "dpo_loss": 0.02783203125, "epoch": 0.81, "final_loss": 0.02783203125, "grad_norm": 0.0, "learning_rate": 8.439747704089218e-08, "loss": 0.0895, "projector_lr": 2.5319243112267656e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.03125, "sft_loss": 0.75390625, "step": 5089 }, { "dpo_loss": 0.0546875, "epoch": 0.81, "final_loss": 0.0546875, "grad_norm": 0.0, "learning_rate": 8.425637831861826e-08, "loss": 0.0918, "projector_lr": 2.527691349558548e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.37890625, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.625, "sft_loss": 0.8203125, "step": 5090 }, { "dpo_loss": 0.10302734375, "epoch": 0.81, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 8.411538678901697e-08, "loss": 0.2115, "projector_lr": 2.5234616036705093e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.65625, "sft_loss": 0.7578125, "step": 5091 }, { "dpo_loss": 0.1357421875, "epoch": 0.81, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 8.397450248844045e-08, "loss": 0.1293, "projector_lr": 2.5192350746532137e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.375, "sft_loss": 0.80859375, "step": 5092 }, { "dpo_loss": 0.263671875, "epoch": 0.81, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 8.383372545321365e-08, "loss": 0.1813, "projector_lr": 2.51501176359641e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.90625, "sft_loss": 0.6953125, "step": 5093 }, { "dpo_loss": 0.09619140625, "epoch": 0.82, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 8.369305571963354e-08, "loss": 0.0951, "projector_lr": 2.510791671589006e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -6.8125, "sft_loss": 0.796875, "step": 5094 }, { "dpo_loss": 0.0245361328125, "epoch": 0.82, "final_loss": 0.0245361328125, "grad_norm": 0.0, "learning_rate": 8.355249332396936e-08, "loss": 0.0555, "projector_lr": 2.5065747997190806e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.48046875, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.8125, "sft_loss": 0.60546875, "step": 5095 }, { "dpo_loss": 0.19921875, "epoch": 0.82, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 8.341203830246308e-08, "loss": 0.446, "projector_lr": 2.502361149073892e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.90625, "sft_loss": 0.59765625, "step": 5096 }, { "dpo_loss": 0.0869140625, "epoch": 0.82, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 8.327169069132855e-08, "loss": 0.1502, "projector_lr": 2.498150720739857e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2451171875, "rewards_train/margins": 5.625, "rewards_train/rejected": -5.875, "sft_loss": 0.71875, "step": 5097 }, { "dpo_loss": 0.3359375, "epoch": 0.82, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 8.313145052675202e-08, "loss": 0.1761, "projector_lr": 2.493943515802561e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.28125, "sft_loss": 0.79296875, "step": 5098 }, { "dpo_loss": 0.259765625, "epoch": 0.82, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 8.299131784489238e-08, "loss": 0.1384, "projector_lr": 2.489739535346772e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.421875, "rewards_train/margins": 3.15625, "rewards_train/rejected": -3.59375, "sft_loss": 0.85546875, "step": 5099 }, { "dpo_loss": 0.173828125, "epoch": 0.82, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 8.28512926818804e-08, "loss": 0.1714, "projector_lr": 2.4855387804564123e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.71875, "sft_loss": 0.828125, "step": 5100 }, { "dpo_loss": 0.173828125, "epoch": 0.82, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 8.271137507381925e-08, "loss": 0.2497, "projector_lr": 2.481341252214578e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 3.828125, "rewards_train/rejected": -5.0625, "sft_loss": 1.0546875, "step": 5101 }, { "dpo_loss": 0.053466796875, "epoch": 0.82, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 8.257156505678426e-08, "loss": 0.0483, "projector_lr": 2.477146951703528e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.1875, "sft_loss": 0.82421875, "step": 5102 }, { "dpo_loss": 0.12109375, "epoch": 0.82, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 8.243186266682345e-08, "loss": 0.1047, "projector_lr": 2.4729558800047035e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.75, "sft_loss": 0.87890625, "step": 5103 }, { "dpo_loss": 0.5546875, "epoch": 0.82, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 8.229226793995647e-08, "loss": 0.346, "projector_lr": 2.4687680381986944e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.6875, "sft_loss": 1.265625, "step": 5104 }, { "dpo_loss": 0.0771484375, "epoch": 0.82, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 8.215278091217598e-08, "loss": 0.3172, "projector_lr": 2.4645834273652794e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.53125, "sft_loss": 0.79296875, "step": 5105 }, { "dpo_loss": 0.045654296875, "epoch": 0.82, "final_loss": 0.045654296875, "grad_norm": 0.0, "learning_rate": 8.201340161944587e-08, "loss": 0.0824, "projector_lr": 2.460402048583376e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.125, "sft_loss": 0.83203125, "step": 5106 }, { "dpo_loss": 0.06787109375, "epoch": 0.82, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 8.187413009770322e-08, "loss": 0.1715, "projector_lr": 2.456223902931097e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.078125, "rewards_train/margins": 4.75, "rewards_train/rejected": -6.8125, "sft_loss": 0.77734375, "step": 5107 }, { "dpo_loss": 0.0255126953125, "epoch": 0.82, "final_loss": 0.0255126953125, "grad_norm": 0.0, "learning_rate": 8.17349663828566e-08, "loss": 0.2897, "projector_lr": 2.452048991485698e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.875, "sft_loss": 0.79296875, "step": 5108 }, { "dpo_loss": 0.1484375, "epoch": 0.82, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 8.159591051078751e-08, "loss": 0.1226, "projector_lr": 2.447877315323626e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.84375, "sft_loss": 1.046875, "step": 5109 }, { "dpo_loss": 0.007415771484375, "epoch": 0.82, "final_loss": 0.007415771484375, "grad_norm": 0.0, "learning_rate": 8.145696251734874e-08, "loss": 0.0759, "projector_lr": 2.443708875520462e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.5, "sft_loss": 0.65625, "step": 5110 }, { "dpo_loss": 0.28125, "epoch": 0.82, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 8.131812243836605e-08, "loss": 0.227, "projector_lr": 2.439543673150982e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -4.40625, "sft_loss": 0.8359375, "step": 5111 }, { "dpo_loss": 0.0830078125, "epoch": 0.82, "final_loss": 0.0830078125, "grad_norm": 0.0, "learning_rate": 8.117939030963689e-08, "loss": 0.0576, "projector_lr": 2.435381709289107e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.0, "sft_loss": 0.7890625, "step": 5112 }, { "dpo_loss": 0.1298828125, "epoch": 0.82, "final_loss": 0.1298828125, "grad_norm": 0.0, "learning_rate": 8.104076616693134e-08, "loss": 0.1621, "projector_lr": 2.4312229850079404e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.859375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.9375, "sft_loss": 0.640625, "step": 5113 }, { "dpo_loss": 0.2119140625, "epoch": 0.82, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 8.090225004599116e-08, "loss": 0.1159, "projector_lr": 2.427067501379735e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.375, "rewards_train/rejected": -6.0625, "sft_loss": 1.4375, "step": 5114 }, { "dpo_loss": 0.083984375, "epoch": 0.82, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 8.07638419825305e-08, "loss": 0.043, "projector_lr": 2.4229152594759154e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.9375, "sft_loss": 0.71484375, "step": 5115 }, { "dpo_loss": 0.283203125, "epoch": 0.82, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 8.062554201223549e-08, "loss": 0.179, "projector_lr": 2.4187662603670647e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.5, "rewards_train/margins": 3.96875, "rewards_train/rejected": -6.46875, "sft_loss": 1.171875, "step": 5116 }, { "dpo_loss": 0.0419921875, "epoch": 0.82, "final_loss": 0.0419921875, "grad_norm": 0.0, "learning_rate": 8.048735017076468e-08, "loss": 0.049, "projector_lr": 2.4146205051229407e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 7.875, "rewards_train/rejected": -8.9375, "sft_loss": 0.765625, "step": 5117 }, { "dpo_loss": 0.048828125, "epoch": 0.82, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 8.034926649374857e-08, "loss": 0.1581, "projector_lr": 2.410477994812457e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.02490234375, "rewards_train/margins": 5.84375, "rewards_train/rejected": -5.875, "sft_loss": 0.5390625, "step": 5118 }, { "dpo_loss": 0.361328125, "epoch": 0.82, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 8.02112910167897e-08, "loss": 0.2224, "projector_lr": 2.406338730503691e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 2.21875, "rewards_train/rejected": -2.859375, "sft_loss": 0.7109375, "step": 5119 }, { "dpo_loss": 0.09228515625, "epoch": 0.82, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 8.007342377546266e-08, "loss": 0.0729, "projector_lr": 2.40220271326388e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1552734375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -4.8125, "sft_loss": 0.59375, "step": 5120 }, { "dpo_loss": 0.1552734375, "epoch": 0.82, "final_loss": 0.1552734375, "grad_norm": 0.0, "learning_rate": 7.993566480531456e-08, "loss": 0.1065, "projector_lr": 2.3980699441594366e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.25, "sft_loss": 0.94140625, "step": 5121 }, { "dpo_loss": 0.0693359375, "epoch": 0.82, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 7.979801414186399e-08, "loss": 0.0556, "projector_lr": 2.39394042425592e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.90625, "sft_loss": 0.8125, "step": 5122 }, { "dpo_loss": 0.1826171875, "epoch": 0.82, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 7.966047182060226e-08, "loss": 0.1091, "projector_lr": 2.389814154618068e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.6875, "sft_loss": 0.8515625, "step": 5123 }, { "dpo_loss": 0.087890625, "epoch": 0.82, "final_loss": 0.087890625, "grad_norm": 0.0, "learning_rate": 7.952303787699194e-08, "loss": 0.2703, "projector_lr": 2.385691136309758e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.28125, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.03125, "sft_loss": 0.5078125, "step": 5124 }, { "dpo_loss": 0.2412109375, "epoch": 0.82, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 7.938571234646851e-08, "loss": 0.1761, "projector_lr": 2.3815713703940556e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9453125, "rewards_train/margins": 3.296875, "rewards_train/rejected": -5.25, "sft_loss": 0.890625, "step": 5125 }, { "dpo_loss": 0.14453125, "epoch": 0.82, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 7.924849526443883e-08, "loss": 0.2005, "projector_lr": 2.377454857933165e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.96875, "sft_loss": 0.765625, "step": 5126 }, { "dpo_loss": 0.1806640625, "epoch": 0.82, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 7.911138666628236e-08, "loss": 0.1358, "projector_lr": 2.373341599988471e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 2.984375, "rewards_train/rejected": -4.46875, "sft_loss": 0.7109375, "step": 5127 }, { "dpo_loss": 0.0703125, "epoch": 0.82, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 7.897438658735012e-08, "loss": 0.0927, "projector_lr": 2.3692315976205037e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.78125, "sft_loss": 0.74609375, "step": 5128 }, { "dpo_loss": 0.361328125, "epoch": 0.82, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 7.883749506296533e-08, "loss": 0.3764, "projector_lr": 2.36512485188896e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.3125, "sft_loss": 0.68359375, "step": 5129 }, { "dpo_loss": 0.01202392578125, "epoch": 0.82, "final_loss": 0.01202392578125, "grad_norm": 0.0, "learning_rate": 7.870071212842316e-08, "loss": 0.0251, "projector_lr": 2.3610213638526952e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4765625, "rewards_train/margins": 7.15625, "rewards_train/rejected": -7.625, "sft_loss": 0.78515625, "step": 5130 }, { "dpo_loss": 0.58984375, "epoch": 0.82, "final_loss": 0.58984375, "grad_norm": 0.0, "learning_rate": 7.856403781899106e-08, "loss": 0.345, "projector_lr": 2.3569211345697322e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.90625, "sft_loss": 0.91015625, "step": 5131 }, { "dpo_loss": 0.0849609375, "epoch": 0.82, "final_loss": 0.0849609375, "grad_norm": 0.0, "learning_rate": 7.842747216990819e-08, "loss": 0.0509, "projector_lr": 2.352824165097246e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.71875, "sft_loss": 0.73046875, "step": 5132 }, { "dpo_loss": 0.109375, "epoch": 0.82, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 7.829101521638554e-08, "loss": 0.0889, "projector_lr": 2.3487304564915662e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.625, "sft_loss": 0.71484375, "step": 5133 }, { "dpo_loss": 0.310546875, "epoch": 0.82, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 7.815466699360657e-08, "loss": 0.2394, "projector_lr": 2.344640009808197e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.34375, "sft_loss": 0.87109375, "step": 5134 }, { "dpo_loss": 0.306640625, "epoch": 0.82, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 7.801842753672639e-08, "loss": 0.2028, "projector_lr": 2.3405528261017917e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 2.71875, "rewards_train/rejected": -4.125, "sft_loss": 1.0078125, "step": 5135 }, { "dpo_loss": 0.03662109375, "epoch": 0.82, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 7.7882296880872e-08, "loss": 0.0946, "projector_lr": 2.3364689064261602e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8828125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.8125, "sft_loss": 0.67578125, "step": 5136 }, { "dpo_loss": 0.0888671875, "epoch": 0.82, "final_loss": 0.0888671875, "grad_norm": 0.0, "learning_rate": 7.774627506114239e-08, "loss": 0.3669, "projector_lr": 2.332388251834272e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.404296875, "rewards_train/margins": 4.375, "rewards_train/rejected": -4.78125, "sft_loss": 0.64453125, "step": 5137 }, { "dpo_loss": 0.375, "epoch": 0.82, "final_loss": 0.375, "grad_norm": 0.0, "learning_rate": 7.761036211260879e-08, "loss": 0.2613, "projector_lr": 2.328310863378264e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.7890625, "rewards_train/margins": 1.9296875, "rewards_train/rejected": -3.71875, "sft_loss": 1.1484375, "step": 5138 }, { "dpo_loss": 0.111328125, "epoch": 0.82, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 7.74745580703139e-08, "loss": 0.111, "projector_lr": 2.324236742109417e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.34375, "sft_loss": 1.015625, "step": 5139 }, { "dpo_loss": 0.10986328125, "epoch": 0.82, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 7.733886296927278e-08, "loss": 0.2017, "projector_lr": 2.3201658890781835e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.9375, "sft_loss": 0.62109375, "step": 5140 }, { "dpo_loss": 0.380859375, "epoch": 0.82, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 7.720327684447209e-08, "loss": 0.4475, "projector_lr": 2.3160983053341628e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.8125, "sft_loss": 0.8828125, "step": 5141 }, { "dpo_loss": 0.0186767578125, "epoch": 0.82, "final_loss": 0.0186767578125, "grad_norm": 0.0, "learning_rate": 7.706779973087046e-08, "loss": 0.0905, "projector_lr": 2.312033991926114e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.10595703125, "rewards_train/margins": 6.03125, "rewards_train/rejected": -5.90625, "sft_loss": 0.7109375, "step": 5142 }, { "dpo_loss": 0.07373046875, "epoch": 0.82, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 7.693243166339835e-08, "loss": 0.0979, "projector_lr": 2.3079729499019507e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.65625, "sft_loss": 0.9765625, "step": 5143 }, { "dpo_loss": 0.2216796875, "epoch": 0.82, "final_loss": 0.2216796875, "grad_norm": 0.0, "learning_rate": 7.67971726769584e-08, "loss": 0.3998, "projector_lr": 2.303915180308752e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.65625, "sft_loss": 0.76953125, "step": 5144 }, { "dpo_loss": 0.185546875, "epoch": 0.82, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 7.666202280642481e-08, "loss": 0.1538, "projector_lr": 2.2998606841927444e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.9375, "sft_loss": 0.8671875, "step": 5145 }, { "dpo_loss": 0.376953125, "epoch": 0.82, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 7.652698208664376e-08, "loss": 0.2184, "projector_lr": 2.2958094625993132e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.15625, "sft_loss": 0.66796875, "step": 5146 }, { "dpo_loss": 0.2275390625, "epoch": 0.82, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 7.639205055243319e-08, "loss": 0.128, "projector_lr": 2.291761516572996e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.1875, "sft_loss": 0.68359375, "step": 5147 }, { "dpo_loss": 0.0703125, "epoch": 0.82, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 7.625722823858321e-08, "loss": 0.3342, "projector_lr": 2.2877168471574967e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.390625, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.53125, "sft_loss": 0.73828125, "step": 5148 }, { "dpo_loss": 0.2001953125, "epoch": 0.82, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 7.612251517985535e-08, "loss": 0.1364, "projector_lr": 2.2836754553956606e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.75, "sft_loss": 0.60546875, "step": 5149 }, { "dpo_loss": 0.275390625, "epoch": 0.82, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 7.598791141098338e-08, "loss": 0.2449, "projector_lr": 2.2796373423295013e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.25, "sft_loss": 0.67578125, "step": 5150 }, { "dpo_loss": 0.0439453125, "epoch": 0.82, "final_loss": 0.0439453125, "grad_norm": 0.0, "learning_rate": 7.58534169666723e-08, "loss": 0.2758, "projector_lr": 2.275602509000169e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.90625, "sft_loss": 0.6796875, "step": 5151 }, { "dpo_loss": 0.189453125, "epoch": 0.82, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 7.571903188159967e-08, "loss": 0.2844, "projector_lr": 2.2715709564479904e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.0625, "sft_loss": 0.609375, "step": 5152 }, { "dpo_loss": 0.0252685546875, "epoch": 0.82, "final_loss": 0.0252685546875, "grad_norm": 0.0, "learning_rate": 7.558475619041421e-08, "loss": 0.0978, "projector_lr": 2.2675426857124266e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.31640625, "rewards_train/margins": 7.21875, "rewards_train/rejected": -7.53125, "sft_loss": 0.71484375, "step": 5153 }, { "dpo_loss": 1.0390625, "epoch": 0.82, "final_loss": 1.0390625, "grad_norm": 0.0, "learning_rate": 7.545058992773701e-08, "loss": 0.6624, "projector_lr": 2.2635176978321105e-07, "rewards_train/accuracies": 0.5, "rewards_train/chosen": -2.09375, "rewards_train/margins": 1.0234375, "rewards_train/rejected": -3.125, "sft_loss": 0.8125, "step": 5154 }, { "dpo_loss": 0.06982421875, "epoch": 0.82, "final_loss": 0.06982421875, "grad_norm": 0.0, "learning_rate": 7.531653312816028e-08, "loss": 0.1005, "projector_lr": 2.2594959938448084e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -7.03125, "sft_loss": 1.0078125, "step": 5155 }, { "dpo_loss": 0.271484375, "epoch": 0.82, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 7.51825858262486e-08, "loss": 0.1866, "projector_lr": 2.255477574787458e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 2.90625, "rewards_train/rejected": -3.609375, "sft_loss": 0.625, "step": 5156 }, { "dpo_loss": 0.10888671875, "epoch": 0.83, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 7.504874805653794e-08, "loss": 0.2159, "projector_lr": 2.2514624416961383e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.28125, "sft_loss": 1.0390625, "step": 5157 }, { "dpo_loss": 0.056640625, "epoch": 0.83, "final_loss": 0.056640625, "grad_norm": 0.0, "learning_rate": 7.491501985353632e-08, "loss": 0.2085, "projector_lr": 2.24745059560609e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.8125, "sft_loss": 1.015625, "step": 5158 }, { "dpo_loss": 0.248046875, "epoch": 0.83, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 7.478140125172338e-08, "loss": 0.1766, "projector_lr": 2.2434420375517017e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.375, "sft_loss": 0.671875, "step": 5159 }, { "dpo_loss": 0.08935546875, "epoch": 0.83, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 7.464789228555036e-08, "loss": 0.1382, "projector_lr": 2.2394367685665106e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.9375, "sft_loss": 0.5859375, "step": 5160 }, { "dpo_loss": 0.11376953125, "epoch": 0.83, "final_loss": 0.11376953125, "grad_norm": 0.0, "learning_rate": 7.45144929894403e-08, "loss": 0.2195, "projector_lr": 2.2354347896832088e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.25, "sft_loss": 0.640625, "step": 5161 }, { "dpo_loss": 0.0537109375, "epoch": 0.83, "final_loss": 0.0537109375, "grad_norm": 0.0, "learning_rate": 7.43812033977882e-08, "loss": 0.1589, "projector_lr": 2.231436101933646e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2734375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.4375, "sft_loss": 0.6953125, "step": 5162 }, { "dpo_loss": 0.1689453125, "epoch": 0.83, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 7.424802354496052e-08, "loss": 0.0904, "projector_lr": 2.2274407063488157e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.439453125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.0625, "sft_loss": 0.7578125, "step": 5163 }, { "dpo_loss": 0.12451171875, "epoch": 0.83, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 7.41149534652955e-08, "loss": 0.1244, "projector_lr": 2.2234486039588654e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.875, "sft_loss": 0.75390625, "step": 5164 }, { "dpo_loss": 0.002838134765625, "epoch": 0.83, "final_loss": 0.002838134765625, "grad_norm": 0.0, "learning_rate": 7.398199319310299e-08, "loss": 0.0629, "projector_lr": 2.2194597957930902e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.8125, "sft_loss": 0.77734375, "step": 5165 }, { "dpo_loss": 0.76171875, "epoch": 0.83, "final_loss": 0.76171875, "grad_norm": 0.0, "learning_rate": 7.38491427626648e-08, "loss": 0.4032, "projector_lr": 2.2154742828799441e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 1.8046875, "rewards_train/rejected": -3.296875, "sft_loss": 0.84765625, "step": 5166 }, { "dpo_loss": 0.197265625, "epoch": 0.83, "final_loss": 0.197265625, "grad_norm": 0.0, "learning_rate": 7.3716402208234e-08, "loss": 0.1269, "projector_lr": 2.2114920662470202e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.8125, "sft_loss": 0.55859375, "step": 5167 }, { "dpo_loss": 0.1767578125, "epoch": 0.83, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 7.358377156403583e-08, "loss": 0.117, "projector_lr": 2.2075131469210751e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.703125, "rewards_train/margins": 3.828125, "rewards_train/rejected": -5.53125, "sft_loss": 0.8125, "step": 5168 }, { "dpo_loss": 0.034912109375, "epoch": 0.83, "final_loss": 0.034912109375, "grad_norm": 0.0, "learning_rate": 7.345125086426674e-08, "loss": 0.0523, "projector_lr": 2.203537525928002e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0810546875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -4.75, "sft_loss": 0.82421875, "step": 5169 }, { "dpo_loss": 0.25390625, "epoch": 0.83, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 7.331884014309514e-08, "loss": 0.1562, "projector_lr": 2.1995652042928542e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.71875, "sft_loss": 0.796875, "step": 5170 }, { "dpo_loss": 0.024658203125, "epoch": 0.83, "final_loss": 0.024658203125, "grad_norm": 0.0, "learning_rate": 7.318653943466074e-08, "loss": 0.2449, "projector_lr": 2.1955961830398224e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.875, "sft_loss": 0.6640625, "step": 5171 }, { "dpo_loss": 0.05322265625, "epoch": 0.83, "final_loss": 0.05322265625, "grad_norm": 0.0, "learning_rate": 7.305434877307537e-08, "loss": 0.0984, "projector_lr": 2.1916304631922612e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.8125, "sft_loss": 0.76953125, "step": 5172 }, { "dpo_loss": 0.134765625, "epoch": 0.83, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 7.292226819242214e-08, "loss": 0.1602, "projector_lr": 2.1876680457726645e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.287109375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.375, "sft_loss": 0.65234375, "step": 5173 }, { "dpo_loss": 0.07666015625, "epoch": 0.83, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 7.279029772675571e-08, "loss": 0.2863, "projector_lr": 2.1837089318026714e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.5625, "sft_loss": 0.96484375, "step": 5174 }, { "dpo_loss": 0.287109375, "epoch": 0.83, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 7.265843741010269e-08, "loss": 0.2037, "projector_lr": 2.1797531223030808e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 2.40625, "rewards_train/rejected": -3.359375, "sft_loss": 0.734375, "step": 5175 }, { "dpo_loss": 0.3046875, "epoch": 0.83, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 7.252668727646111e-08, "loss": 0.18, "projector_lr": 2.1758006182938332e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.15625, "sft_loss": 1.046875, "step": 5176 }, { "dpo_loss": 0.5234375, "epoch": 0.83, "final_loss": 0.5234375, "grad_norm": 0.0, "learning_rate": 7.239504735980045e-08, "loss": 0.4141, "projector_lr": 2.1718514207940138e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.03125, "sft_loss": 0.609375, "step": 5177 }, { "dpo_loss": 0.26171875, "epoch": 0.83, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 7.226351769406185e-08, "loss": 0.1605, "projector_lr": 2.167905530821856e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.46875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.9375, "sft_loss": 0.92578125, "step": 5178 }, { "dpo_loss": 0.06982421875, "epoch": 0.83, "final_loss": 0.06982421875, "grad_norm": 0.0, "learning_rate": 7.213209831315831e-08, "loss": 0.216, "projector_lr": 2.1639629493947493e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.15625, "sft_loss": 0.5859375, "step": 5179 }, { "dpo_loss": 0.439453125, "epoch": 0.83, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 7.200078925097392e-08, "loss": 0.2555, "projector_lr": 2.1600236775292175e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.65625, "sft_loss": 0.671875, "step": 5180 }, { "dpo_loss": 0.1455078125, "epoch": 0.83, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 7.186959054136487e-08, "loss": 0.3095, "projector_lr": 2.1560877162409464e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.21875, "sft_loss": 0.5859375, "step": 5181 }, { "dpo_loss": 0.265625, "epoch": 0.83, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 7.173850221815824e-08, "loss": 0.196, "projector_lr": 2.1521550665447477e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.40625, "sft_loss": 0.765625, "step": 5182 }, { "dpo_loss": 0.24609375, "epoch": 0.83, "final_loss": 0.24609375, "grad_norm": 0.0, "learning_rate": 7.160752431515327e-08, "loss": 0.1599, "projector_lr": 2.1482257294545982e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.34375, "sft_loss": 0.90625, "step": 5183 }, { "dpo_loss": 0.142578125, "epoch": 0.83, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 7.147665686612026e-08, "loss": 0.0866, "projector_lr": 2.1442997059836077e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.9375, "sft_loss": 0.7421875, "step": 5184 }, { "dpo_loss": 0.3828125, "epoch": 0.83, "final_loss": 0.3828125, "grad_norm": 0.0, "learning_rate": 7.134589990480139e-08, "loss": 0.5068, "projector_lr": 2.1403769971440418e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.28125, "sft_loss": 0.65234375, "step": 5185 }, { "dpo_loss": 0.00823974609375, "epoch": 0.83, "final_loss": 0.00823974609375, "grad_norm": 0.0, "learning_rate": 7.121525346491019e-08, "loss": 0.0427, "projector_lr": 2.136457603947306e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.5, "sft_loss": 0.7734375, "step": 5186 }, { "dpo_loss": 0.6875, "epoch": 0.83, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 7.108471758013168e-08, "loss": 0.3944, "projector_lr": 2.1325415274039505e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.015625, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.9375, "sft_loss": 0.76171875, "step": 5187 }, { "dpo_loss": 0.095703125, "epoch": 0.83, "final_loss": 0.095703125, "grad_norm": 0.0, "learning_rate": 7.09542922841222e-08, "loss": 0.1455, "projector_lr": 2.128628768523666e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.4375, "sft_loss": 0.9375, "step": 5188 }, { "dpo_loss": 0.458984375, "epoch": 0.83, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 7.082397761051012e-08, "loss": 0.2332, "projector_lr": 2.1247193283153038e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.09375, "sft_loss": 1.28125, "step": 5189 }, { "dpo_loss": 0.66015625, "epoch": 0.83, "final_loss": 0.66015625, "grad_norm": 0.0, "learning_rate": 7.069377359289475e-08, "loss": 0.5294, "projector_lr": 2.1208132077868425e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.25, "rewards_train/margins": 2.75, "rewards_train/rejected": -4.0, "sft_loss": 0.71875, "step": 5190 }, { "dpo_loss": 0.1748046875, "epoch": 0.83, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 7.056368026484705e-08, "loss": 0.1365, "projector_lr": 2.1169104079454115e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0625, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.96875, "sft_loss": 0.90234375, "step": 5191 }, { "dpo_loss": 0.0751953125, "epoch": 0.83, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 7.043369765990941e-08, "loss": 0.0817, "projector_lr": 2.1130109297972827e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.375, "sft_loss": 0.6875, "step": 5192 }, { "dpo_loss": 0.11572265625, "epoch": 0.83, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 7.030382581159589e-08, "loss": 0.1751, "projector_lr": 2.1091147743478766e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33984375, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.09375, "sft_loss": 0.72265625, "step": 5193 }, { "dpo_loss": 0.142578125, "epoch": 0.83, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 7.017406475339155e-08, "loss": 0.251, "projector_lr": 2.1052219426017467e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.1875, "sft_loss": 0.7578125, "step": 5194 }, { "dpo_loss": 0.0166015625, "epoch": 0.83, "final_loss": 0.0166015625, "grad_norm": 0.0, "learning_rate": 7.004441451875353e-08, "loss": 0.1641, "projector_lr": 2.1013324355626062e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -8.0625, "sft_loss": 0.890625, "step": 5195 }, { "dpo_loss": 0.294921875, "epoch": 0.83, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 6.991487514110961e-08, "loss": 0.2937, "projector_lr": 2.0974462542332885e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.75, "sft_loss": 0.73046875, "step": 5196 }, { "dpo_loss": 0.0986328125, "epoch": 0.83, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 6.978544665385966e-08, "loss": 0.0757, "projector_lr": 2.0935633996157898e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.15625, "sft_loss": 0.61328125, "step": 5197 }, { "dpo_loss": 0.140625, "epoch": 0.83, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 6.965612909037449e-08, "loss": 0.1002, "projector_lr": 2.089683872711235e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.625, "sft_loss": 0.546875, "step": 5198 }, { "dpo_loss": 0.302734375, "epoch": 0.83, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 6.952692248399688e-08, "loss": 0.2779, "projector_lr": 2.0858076745199069e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 3.453125, "rewards_train/rejected": -5.4375, "sft_loss": 0.9296875, "step": 5199 }, { "dpo_loss": 0.2109375, "epoch": 0.83, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 6.939782686804019e-08, "loss": 0.1654, "projector_lr": 2.0819348060412058e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.65625, "sft_loss": 0.6015625, "step": 5200 }, { "dpo_loss": 0.08837890625, "epoch": 0.83, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 6.926884227578983e-08, "loss": 0.1101, "projector_lr": 2.078065268273695e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.28125, "sft_loss": 0.7578125, "step": 5201 }, { "dpo_loss": 0.177734375, "epoch": 0.83, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 6.913996874050226e-08, "loss": 0.1366, "projector_lr": 2.074199062215068e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6875, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.90625, "sft_loss": 0.8515625, "step": 5202 }, { "dpo_loss": 0.0172119140625, "epoch": 0.83, "final_loss": 0.0172119140625, "grad_norm": 0.0, "learning_rate": 6.901120629540563e-08, "loss": 0.1121, "projector_lr": 2.0703361888621692e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.6875, "sft_loss": 0.703125, "step": 5203 }, { "dpo_loss": 0.01226806640625, "epoch": 0.83, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 6.888255497369905e-08, "loss": 0.199, "projector_lr": 2.0664766492109715e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.40625, "sft_loss": 0.75390625, "step": 5204 }, { "dpo_loss": 0.01422119140625, "epoch": 0.83, "final_loss": 0.01422119140625, "grad_norm": 0.0, "learning_rate": 6.875401480855315e-08, "loss": 0.0466, "projector_lr": 2.0626204442565944e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 6.5625, "rewards_train/rejected": -6.90625, "sft_loss": 0.76171875, "step": 5205 }, { "dpo_loss": 0.0458984375, "epoch": 0.83, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 6.862558583310979e-08, "loss": 0.0714, "projector_lr": 2.058767574993294e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.875, "sft_loss": 0.71484375, "step": 5206 }, { "dpo_loss": 0.474609375, "epoch": 0.83, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 6.849726808048256e-08, "loss": 0.3405, "projector_lr": 2.054918042414477e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.96875, "rewards_train/margins": 2.46875, "rewards_train/rejected": -3.4375, "sft_loss": 0.91796875, "step": 5207 }, { "dpo_loss": 0.33984375, "epoch": 0.83, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 6.836906158375593e-08, "loss": 0.3293, "projector_lr": 2.0510718475126782e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.71875, "sft_loss": 0.8515625, "step": 5208 }, { "dpo_loss": 0.1328125, "epoch": 0.83, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 6.824096637598593e-08, "loss": 0.0691, "projector_lr": 2.0472289912795778e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.6875, "sft_loss": 0.6171875, "step": 5209 }, { "dpo_loss": 0.061279296875, "epoch": 0.83, "final_loss": 0.061279296875, "grad_norm": 0.0, "learning_rate": 6.811298249019954e-08, "loss": 0.1572, "projector_lr": 2.0433894747059862e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2578125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.4375, "sft_loss": 0.61328125, "step": 5210 }, { "dpo_loss": 0.1748046875, "epoch": 0.83, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 6.798510995939561e-08, "loss": 0.1947, "projector_lr": 2.0395532987818682e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.84375, "sft_loss": 0.98828125, "step": 5211 }, { "dpo_loss": 0.16015625, "epoch": 0.83, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 6.785734881654376e-08, "loss": 0.2451, "projector_lr": 2.035720464496313e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.53125, "sft_loss": 0.796875, "step": 5212 }, { "dpo_loss": 0.11865234375, "epoch": 0.83, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 6.772969909458532e-08, "loss": 0.1591, "projector_lr": 2.0318909728375595e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.4375, "sft_loss": 0.6875, "step": 5213 }, { "dpo_loss": 0.0294189453125, "epoch": 0.83, "final_loss": 0.0294189453125, "grad_norm": 0.0, "learning_rate": 6.760216082643255e-08, "loss": 0.1445, "projector_lr": 2.0280648247929767e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.010986328125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.65625, "sft_loss": 0.54296875, "step": 5214 }, { "dpo_loss": 0.1865234375, "epoch": 0.83, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 6.747473404496901e-08, "loss": 0.2067, "projector_lr": 2.0242420213490702e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.71875, "sft_loss": 0.73046875, "step": 5215 }, { "dpo_loss": 0.2890625, "epoch": 0.83, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 6.734741878304983e-08, "loss": 0.2502, "projector_lr": 2.0204225634914953e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 3.375, "rewards_train/rejected": -3.875, "sft_loss": 0.81640625, "step": 5216 }, { "dpo_loss": 0.1396484375, "epoch": 0.83, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 6.722021507350101e-08, "loss": 0.1921, "projector_lr": 2.0166064522050304e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.5, "sft_loss": 1.234375, "step": 5217 }, { "dpo_loss": 0.09326171875, "epoch": 0.83, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 6.709312294911995e-08, "loss": 0.0953, "projector_lr": 2.0127936884735987e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.96875, "sft_loss": 0.73046875, "step": 5218 }, { "dpo_loss": 0.162109375, "epoch": 0.84, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 6.696614244267524e-08, "loss": 0.2232, "projector_lr": 2.008984273280257e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.625, "sft_loss": 0.703125, "step": 5219 }, { "dpo_loss": 0.01275634765625, "epoch": 0.84, "final_loss": 0.01275634765625, "grad_norm": 0.0, "learning_rate": 6.683927358690683e-08, "loss": 0.1661, "projector_lr": 2.005178207607205e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.90625, "sft_loss": 0.80078125, "step": 5220 }, { "dpo_loss": 0.212890625, "epoch": 0.84, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 6.671251641452563e-08, "loss": 0.4027, "projector_lr": 2.0013754924357692e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.8125, "sft_loss": 0.76953125, "step": 5221 }, { "dpo_loss": 0.177734375, "epoch": 0.84, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 6.658587095821416e-08, "loss": 0.1741, "projector_lr": 1.997576128746425e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 3.640625, "rewards_train/rejected": -4.3125, "sft_loss": 0.7109375, "step": 5222 }, { "dpo_loss": 0.154296875, "epoch": 0.84, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 6.645933725062553e-08, "loss": 0.2257, "projector_lr": 1.993780117518766e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.5625, "sft_loss": 1.09375, "step": 5223 }, { "dpo_loss": 0.0380859375, "epoch": 0.84, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 6.633291532438462e-08, "loss": 0.0593, "projector_lr": 1.9899874597315387e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.90625, "sft_loss": 0.7421875, "step": 5224 }, { "dpo_loss": 0.054443359375, "epoch": 0.84, "final_loss": 0.054443359375, "grad_norm": 0.0, "learning_rate": 6.620660521208705e-08, "loss": 0.1362, "projector_lr": 1.9861981563626114e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.0, "sft_loss": 0.765625, "step": 5225 }, { "dpo_loss": 0.036865234375, "epoch": 0.84, "final_loss": 0.036865234375, "grad_norm": 0.0, "learning_rate": 6.608040694630018e-08, "loss": 0.0295, "projector_lr": 1.9824122083890057e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.796875, "rewards_train/margins": 7.5, "rewards_train/rejected": -8.3125, "sft_loss": 0.8125, "step": 5226 }, { "dpo_loss": 0.0712890625, "epoch": 0.84, "final_loss": 0.0712890625, "grad_norm": 0.0, "learning_rate": 6.595432055956168e-08, "loss": 0.0566, "projector_lr": 1.9786296167868506e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.78125, "sft_loss": 0.92578125, "step": 5227 }, { "dpo_loss": 0.12109375, "epoch": 0.84, "final_loss": 0.12109375, "grad_norm": 0.0, "learning_rate": 6.582834608438121e-08, "loss": 0.0728, "projector_lr": 1.9748503825314363e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.5, "sft_loss": 0.75390625, "step": 5228 }, { "dpo_loss": 0.23828125, "epoch": 0.84, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 6.570248355323898e-08, "loss": 0.1727, "projector_lr": 1.9710745065971697e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.84375, "sft_loss": 0.78515625, "step": 5229 }, { "dpo_loss": 0.10546875, "epoch": 0.84, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 6.55767329985868e-08, "loss": 0.1561, "projector_lr": 1.9673019899576038e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.75, "sft_loss": 0.73046875, "step": 5230 }, { "dpo_loss": 0.376953125, "epoch": 0.84, "final_loss": 0.376953125, "grad_norm": 0.0, "learning_rate": 6.54510944528473e-08, "loss": 0.3, "projector_lr": 1.963532833585419e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.546875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.8125, "sft_loss": 0.8984375, "step": 5231 }, { "dpo_loss": 0.263671875, "epoch": 0.84, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 6.532556794841432e-08, "loss": 0.1441, "projector_lr": 1.9597670384524297e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.53125, "sft_loss": 0.94921875, "step": 5232 }, { "dpo_loss": 0.212890625, "epoch": 0.84, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 6.520015351765267e-08, "loss": 0.1342, "projector_lr": 1.9560046055295804e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.21875, "sft_loss": 0.5546875, "step": 5233 }, { "dpo_loss": 0.302734375, "epoch": 0.84, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 6.507485119289869e-08, "loss": 0.2952, "projector_lr": 1.9522455357869606e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.65625, "sft_loss": 0.455078125, "step": 5234 }, { "dpo_loss": 0.63671875, "epoch": 0.84, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 6.494966100645932e-08, "loss": 0.377, "projector_lr": 1.94848983019378e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.140625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -5.46875, "sft_loss": 1.125, "step": 5235 }, { "dpo_loss": 0.06884765625, "epoch": 0.84, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 6.482458299061283e-08, "loss": 0.1101, "projector_lr": 1.944737489718385e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.625, "sft_loss": 0.6328125, "step": 5236 }, { "dpo_loss": 0.0869140625, "epoch": 0.84, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 6.469961717760852e-08, "loss": 0.1, "projector_lr": 1.9409885153282558e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.8125, "sft_loss": 0.83984375, "step": 5237 }, { "dpo_loss": 0.150390625, "epoch": 0.84, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 6.457476359966684e-08, "loss": 0.1304, "projector_lr": 1.9372429079900056e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.5, "rewards_train/margins": 7.34375, "rewards_train/rejected": -6.84375, "sft_loss": 0.734375, "step": 5238 }, { "dpo_loss": 0.056884765625, "epoch": 0.84, "final_loss": 0.056884765625, "grad_norm": 0.0, "learning_rate": 6.44500222889791e-08, "loss": 0.0783, "projector_lr": 1.9335006686693734e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 7.875, "rewards_train/rejected": -9.0625, "sft_loss": 0.76953125, "step": 5239 }, { "dpo_loss": 0.038818359375, "epoch": 0.84, "final_loss": 0.038818359375, "grad_norm": 0.0, "learning_rate": 6.432539327770814e-08, "loss": 0.0742, "projector_lr": 1.9297617983312444e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.78125, "sft_loss": 0.53125, "step": 5240 }, { "dpo_loss": 0.345703125, "epoch": 0.84, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 6.420087659798707e-08, "loss": 0.1781, "projector_lr": 1.926026297939612e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 3.171875, "rewards_train/rejected": -4.84375, "sft_loss": 1.234375, "step": 5241 }, { "dpo_loss": 0.146484375, "epoch": 0.84, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 6.407647228192076e-08, "loss": 0.1927, "projector_lr": 1.9222941684576229e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.34375, "sft_loss": 0.890625, "step": 5242 }, { "dpo_loss": 0.42578125, "epoch": 0.84, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 6.395218036158462e-08, "loss": 0.2294, "projector_lr": 1.9185654108475388e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -3.734375, "sft_loss": 1.0078125, "step": 5243 }, { "dpo_loss": 0.07470703125, "epoch": 0.84, "final_loss": 0.07470703125, "grad_norm": 0.0, "learning_rate": 6.382800086902562e-08, "loss": 0.3318, "projector_lr": 1.9148400260707688e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.53125, "sft_loss": 0.82421875, "step": 5244 }, { "dpo_loss": 0.1494140625, "epoch": 0.84, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 6.370393383626093e-08, "loss": 0.1793, "projector_lr": 1.9111180150878282e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.84375, "sft_loss": 0.7578125, "step": 5245 }, { "dpo_loss": 0.058349609375, "epoch": 0.84, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 6.357997929527964e-08, "loss": 0.07, "projector_lr": 1.9073993788583892e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.74609375, "rewards_train/margins": 6.59375, "rewards_train/rejected": -5.84375, "sft_loss": 0.71875, "step": 5246 }, { "dpo_loss": 0.5703125, "epoch": 0.84, "final_loss": 0.5703125, "grad_norm": 0.0, "learning_rate": 6.3456137278041e-08, "loss": 0.3235, "projector_lr": 1.90368411834123e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.75, "rewards_train/margins": 2.5625, "rewards_train/rejected": -3.3125, "sft_loss": 0.80078125, "step": 5247 }, { "dpo_loss": 0.1259765625, "epoch": 0.84, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 6.333240781647603e-08, "loss": 0.0835, "projector_lr": 1.899972234494281e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.025146484375, "rewards_train/margins": 5.90625, "rewards_train/rejected": -5.9375, "sft_loss": 0.6875, "step": 5248 }, { "dpo_loss": 0.06689453125, "epoch": 0.84, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 6.320879094248616e-08, "loss": 0.0765, "projector_lr": 1.8962637282745847e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.875, "sft_loss": 0.9765625, "step": 5249 }, { "dpo_loss": 0.0751953125, "epoch": 0.84, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 6.308528668794394e-08, "loss": 0.075, "projector_lr": 1.8925586006383183e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.90625, "sft_loss": 0.859375, "step": 5250 }, { "dpo_loss": 0.03515625, "epoch": 0.84, "final_loss": 0.03515625, "grad_norm": 0.0, "learning_rate": 6.296189508469285e-08, "loss": 0.0979, "projector_lr": 1.8888568525407855e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.9375, "sft_loss": 0.8046875, "step": 5251 }, { "dpo_loss": 0.00433349609375, "epoch": 0.84, "final_loss": 0.00433349609375, "grad_norm": 0.0, "learning_rate": 6.283861616454755e-08, "loss": 0.0276, "projector_lr": 1.8851584849364267e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.875, "rewards_train/margins": 7.21875, "rewards_train/rejected": -9.0625, "sft_loss": 0.703125, "step": 5252 }, { "dpo_loss": 0.05419921875, "epoch": 0.84, "final_loss": 0.05419921875, "grad_norm": 0.0, "learning_rate": 6.271544995929351e-08, "loss": 0.0857, "projector_lr": 1.8814634987788053e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.46875, "sft_loss": 1.1953125, "step": 5253 }, { "dpo_loss": 0.078125, "epoch": 0.84, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 6.259239650068687e-08, "loss": 0.1592, "projector_lr": 1.8777718950206062e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.53125, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.84375, "sft_loss": 0.828125, "step": 5254 }, { "dpo_loss": 0.0015716552734375, "epoch": 0.84, "final_loss": 0.0015716552734375, "grad_norm": 0.0, "learning_rate": 6.246945582045521e-08, "loss": 0.0689, "projector_lr": 1.8740836746136563e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 7.46875, "rewards_train/rejected": -7.75, "sft_loss": 0.6796875, "step": 5255 }, { "dpo_loss": 0.34375, "epoch": 0.84, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 6.234662795029671e-08, "loss": 0.2707, "projector_lr": 1.8703988385089016e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.054931640625, "rewards_train/margins": 6.90625, "rewards_train/rejected": -6.84375, "sft_loss": 0.71875, "step": 5256 }, { "dpo_loss": 0.07666015625, "epoch": 0.84, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 6.222391292188034e-08, "loss": 0.2133, "projector_lr": 1.8667173876564104e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.9375, "sft_loss": 0.55859375, "step": 5257 }, { "dpo_loss": 0.37109375, "epoch": 0.84, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 6.210131076684638e-08, "loss": 0.2519, "projector_lr": 1.8630393230053916e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.984375, "rewards_train/margins": 4.59375, "rewards_train/rejected": -6.5625, "sft_loss": 0.6640625, "step": 5258 }, { "dpo_loss": 0.08251953125, "epoch": 0.84, "final_loss": 0.08251953125, "grad_norm": 0.0, "learning_rate": 6.197882151680572e-08, "loss": 0.086, "projector_lr": 1.859364645504172e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.0, "sft_loss": 0.8671875, "step": 5259 }, { "dpo_loss": 0.058349609375, "epoch": 0.84, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 6.18564452033401e-08, "loss": 0.1861, "projector_lr": 1.8556933561002033e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.28125, "sft_loss": 0.640625, "step": 5260 }, { "dpo_loss": 0.1240234375, "epoch": 0.84, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 6.173418185800239e-08, "loss": 0.1227, "projector_lr": 1.852025455740072e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.59375, "sft_loss": 1.03125, "step": 5261 }, { "dpo_loss": 0.2470703125, "epoch": 0.84, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 6.161203151231614e-08, "loss": 0.261, "projector_lr": 1.8483609453694845e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.53125, "sft_loss": 0.9140625, "step": 5262 }, { "dpo_loss": 0.1767578125, "epoch": 0.84, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 6.148999419777584e-08, "loss": 0.124, "projector_lr": 1.8446998259332755e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.515625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.8125, "sft_loss": 0.80859375, "step": 5263 }, { "dpo_loss": 0.11474609375, "epoch": 0.84, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 6.136806994584659e-08, "loss": 0.2183, "projector_lr": 1.8410420983753978e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.3125, "sft_loss": 0.6875, "step": 5264 }, { "dpo_loss": 0.042236328125, "epoch": 0.84, "final_loss": 0.042236328125, "grad_norm": 0.0, "learning_rate": 6.124625878796491e-08, "loss": 0.0773, "projector_lr": 1.8373877636389473e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.40625, "rewards_train/margins": 6.46875, "rewards_train/rejected": -8.875, "sft_loss": 1.03125, "step": 5265 }, { "dpo_loss": 0.26953125, "epoch": 0.84, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 6.112456075553746e-08, "loss": 0.2173, "projector_lr": 1.8337368226661238e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.59375, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.21875, "sft_loss": 0.9765625, "step": 5266 }, { "dpo_loss": 0.1416015625, "epoch": 0.84, "final_loss": 0.1416015625, "grad_norm": 0.0, "learning_rate": 6.100297587994252e-08, "loss": 0.4187, "projector_lr": 1.8300892763982756e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.3671875, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.375, "sft_loss": 1.0234375, "step": 5267 }, { "dpo_loss": 0.546875, "epoch": 0.84, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 6.088150419252825e-08, "loss": 0.3241, "projector_lr": 1.8264451257758473e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.203125, "rewards_train/margins": 1.53125, "rewards_train/rejected": -2.734375, "sft_loss": 0.86328125, "step": 5268 }, { "dpo_loss": 0.024658203125, "epoch": 0.84, "final_loss": 0.024658203125, "grad_norm": 0.0, "learning_rate": 6.076014572461447e-08, "loss": 0.0865, "projector_lr": 1.822804371738434e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9375, "rewards_train/margins": 4.25, "rewards_train/rejected": -6.1875, "sft_loss": 0.88671875, "step": 5269 }, { "dpo_loss": 0.34765625, "epoch": 0.84, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 6.063890050749127e-08, "loss": 0.2165, "projector_lr": 1.819167015224738e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.75, "sft_loss": 0.8046875, "step": 5270 }, { "dpo_loss": 0.353515625, "epoch": 0.84, "final_loss": 0.353515625, "grad_norm": 0.0, "learning_rate": 6.051776857242008e-08, "loss": 0.2585, "projector_lr": 1.8155330571726025e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.53125, "rewards_train/rejected": -4.5625, "sft_loss": 0.70703125, "step": 5271 }, { "dpo_loss": 0.63671875, "epoch": 0.84, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 6.039674995063231e-08, "loss": 0.472, "projector_lr": 1.8119024985189698e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.40625, "sft_loss": 0.8671875, "step": 5272 }, { "dpo_loss": 0.09326171875, "epoch": 0.84, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 6.027584467333097e-08, "loss": 0.5247, "projector_lr": 1.808275340199929e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.09375, "sft_loss": 0.71484375, "step": 5273 }, { "dpo_loss": 0.0263671875, "epoch": 0.84, "final_loss": 0.0263671875, "grad_norm": 0.0, "learning_rate": 6.01550527716893e-08, "loss": 0.0374, "projector_lr": 1.8046515831506795e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19140625, "rewards_train/margins": 5.53125, "rewards_train/rejected": -5.71875, "sft_loss": 0.52734375, "step": 5274 }, { "dpo_loss": 0.11572265625, "epoch": 0.84, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 6.003437427685171e-08, "loss": 0.1896, "projector_lr": 1.8010312283055514e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.75, "rewards_train/rejected": -6.0625, "sft_loss": 0.96484375, "step": 5275 }, { "dpo_loss": 0.053466796875, "epoch": 0.84, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 5.991380921993306e-08, "loss": 0.0555, "projector_lr": 1.7974142765979918e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.578125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.03125, "sft_loss": 0.98046875, "step": 5276 }, { "dpo_loss": 0.10009765625, "epoch": 0.84, "final_loss": 0.10009765625, "grad_norm": 0.0, "learning_rate": 5.979335763201904e-08, "loss": 0.1323, "projector_lr": 1.7938007289605713e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.71875, "sft_loss": 0.87109375, "step": 5277 }, { "dpo_loss": 0.0830078125, "epoch": 0.84, "final_loss": 0.0830078125, "grad_norm": 0.0, "learning_rate": 5.96730195441661e-08, "loss": 0.2209, "projector_lr": 1.790190586324983e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -5.96875, "sft_loss": 0.7578125, "step": 5278 }, { "dpo_loss": 0.11279296875, "epoch": 0.84, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 5.9552794987401564e-08, "loss": 0.1726, "projector_lr": 1.786583849622047e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1103515625, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.09375, "sft_loss": 0.5859375, "step": 5279 }, { "dpo_loss": 0.07177734375, "epoch": 0.84, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 5.943268399272328e-08, "loss": 0.1771, "projector_lr": 1.7829805197816985e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.1875, "sft_loss": 0.75390625, "step": 5280 }, { "dpo_loss": 0.06396484375, "epoch": 0.84, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 5.9312686591099946e-08, "loss": 0.0399, "projector_lr": 1.7793805977329986e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.181640625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.375, "sft_loss": 0.7421875, "step": 5281 }, { "dpo_loss": 0.16015625, "epoch": 0.85, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 5.919280281347078e-08, "loss": 0.1201, "projector_lr": 1.7757840844041235e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.46484375, "rewards_train/margins": 6.15625, "rewards_train/rejected": -6.625, "sft_loss": 0.46484375, "step": 5282 }, { "dpo_loss": 0.08447265625, "epoch": 0.85, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 5.907303269074604e-08, "loss": 0.1882, "projector_lr": 1.7721909807223812e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.279296875, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.25, "sft_loss": 0.9375, "step": 5283 }, { "dpo_loss": 0.046875, "epoch": 0.85, "final_loss": 0.046875, "grad_norm": 0.0, "learning_rate": 5.895337625380631e-08, "loss": 0.2049, "projector_lr": 1.7686012876141893e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.75, "sft_loss": 0.75390625, "step": 5284 }, { "dpo_loss": 0.19921875, "epoch": 0.85, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 5.88338335335033e-08, "loss": 0.1746, "projector_lr": 1.765015006005099e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 2.96875, "rewards_train/rejected": -3.5, "sft_loss": 0.69921875, "step": 5285 }, { "dpo_loss": 0.23828125, "epoch": 0.85, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 5.871440456065879e-08, "loss": 0.1484, "projector_lr": 1.761432136819764e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.34375, "sft_loss": 0.91796875, "step": 5286 }, { "dpo_loss": 0.2734375, "epoch": 0.85, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 5.859508936606588e-08, "loss": 0.2339, "projector_lr": 1.7578526809819767e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.96875, "sft_loss": 0.54296875, "step": 5287 }, { "dpo_loss": 0.0908203125, "epoch": 0.85, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 5.8475887980487806e-08, "loss": 0.0872, "projector_lr": 1.7542766394146343e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.1875, "sft_loss": 0.83203125, "step": 5288 }, { "dpo_loss": 0.193359375, "epoch": 0.85, "final_loss": 0.193359375, "grad_norm": 0.0, "learning_rate": 5.835680043465885e-08, "loss": 0.3163, "projector_lr": 1.7507040130397656e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.125, "sft_loss": 1.390625, "step": 5289 }, { "dpo_loss": 0.59765625, "epoch": 0.85, "final_loss": 0.59765625, "grad_norm": 0.0, "learning_rate": 5.8237826759283794e-08, "loss": 0.4841, "projector_lr": 1.747134802778514e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.0625, "sft_loss": 0.69140625, "step": 5290 }, { "dpo_loss": 0.10986328125, "epoch": 0.85, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 5.811896698503799e-08, "loss": 0.0596, "projector_lr": 1.7435690095511397e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.34375, "sft_loss": 0.7109375, "step": 5291 }, { "dpo_loss": 0.158203125, "epoch": 0.85, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 5.800022114256731e-08, "loss": 0.368, "projector_lr": 1.7400066342770192e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.671875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.75, "sft_loss": 0.734375, "step": 5292 }, { "dpo_loss": 0.201171875, "epoch": 0.85, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 5.788158926248876e-08, "loss": 0.1202, "projector_lr": 1.736447677874663e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.03125, "sft_loss": 0.80078125, "step": 5293 }, { "dpo_loss": 0.01373291015625, "epoch": 0.85, "final_loss": 0.01373291015625, "grad_norm": 0.0, "learning_rate": 5.7763071375389437e-08, "loss": 0.0274, "projector_lr": 1.7328921412616833e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.21875, "sft_loss": 0.703125, "step": 5294 }, { "dpo_loss": 0.357421875, "epoch": 0.85, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 5.764466751182712e-08, "loss": 0.1927, "projector_lr": 1.7293400253548135e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.96875, "sft_loss": 0.7421875, "step": 5295 }, { "dpo_loss": 0.06884765625, "epoch": 0.85, "final_loss": 0.06884765625, "grad_norm": 0.0, "learning_rate": 5.752637770233054e-08, "loss": 0.084, "projector_lr": 1.7257913310699165e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 5.90625, "rewards_train/rejected": -7.03125, "sft_loss": 0.6640625, "step": 5296 }, { "dpo_loss": 0.1328125, "epoch": 0.85, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 5.7408201977398704e-08, "loss": 0.3779, "projector_lr": 1.7222460593219615e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.03125, "sft_loss": 0.8125, "step": 5297 }, { "dpo_loss": 0.1787109375, "epoch": 0.85, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 5.7290140367501236e-08, "loss": 0.3252, "projector_lr": 1.7187042110250373e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.625, "sft_loss": 0.6015625, "step": 5298 }, { "dpo_loss": 0.302734375, "epoch": 0.85, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 5.7172192903078344e-08, "loss": 0.1695, "projector_lr": 1.7151657870923505e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.4375, "sft_loss": 0.671875, "step": 5299 }, { "dpo_loss": 0.10546875, "epoch": 0.85, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 5.705435961454097e-08, "loss": 0.1202, "projector_lr": 1.7116307884362293e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.21875, "sft_loss": 1.125, "step": 5300 }, { "dpo_loss": 0.056396484375, "epoch": 0.85, "final_loss": 0.056396484375, "grad_norm": 0.0, "learning_rate": 5.6936640532270316e-08, "loss": 0.1323, "projector_lr": 1.7080992159681098e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.234375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -7.4375, "sft_loss": 0.90234375, "step": 5301 }, { "dpo_loss": 0.14453125, "epoch": 0.85, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 5.6819035686618554e-08, "loss": 0.0864, "projector_lr": 1.7045710705985568e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8203125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.59375, "sft_loss": 0.72265625, "step": 5302 }, { "dpo_loss": 0.212890625, "epoch": 0.85, "final_loss": 0.212890625, "grad_norm": 0.0, "learning_rate": 5.670154510790803e-08, "loss": 0.1889, "projector_lr": 1.701046353237241e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.25, "sft_loss": 1.0, "step": 5303 }, { "dpo_loss": 0.154296875, "epoch": 0.85, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 5.65841688264318e-08, "loss": 0.3302, "projector_lr": 1.697525064792954e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.3125, "sft_loss": 0.60546875, "step": 5304 }, { "dpo_loss": 0.384765625, "epoch": 0.85, "final_loss": 0.384765625, "grad_norm": 0.0, "learning_rate": 5.6466906872453314e-08, "loss": 0.3884, "projector_lr": 1.6940072061735995e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.234375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -3.703125, "sft_loss": 0.765625, "step": 5305 }, { "dpo_loss": 0.10986328125, "epoch": 0.85, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 5.6349759276206774e-08, "loss": 0.2055, "projector_lr": 1.6904927782862035e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34765625, "rewards_train/margins": 6.59375, "rewards_train/rejected": -6.9375, "sft_loss": 0.6953125, "step": 5306 }, { "dpo_loss": 0.57421875, "epoch": 0.85, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 5.6232726067896695e-08, "loss": 0.4684, "projector_lr": 1.686981782036901e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.78125, "sft_loss": 0.86328125, "step": 5307 }, { "dpo_loss": 0.07568359375, "epoch": 0.85, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 5.6115807277698216e-08, "loss": 0.255, "projector_lr": 1.6834742183309466e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.8125, "sft_loss": 0.7109375, "step": 5308 }, { "dpo_loss": 0.119140625, "epoch": 0.85, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 5.599900293575677e-08, "loss": 0.0724, "projector_lr": 1.679970088072703e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4765625, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.03125, "sft_loss": 0.6484375, "step": 5309 }, { "dpo_loss": 0.076171875, "epoch": 0.85, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 5.5882313072188715e-08, "loss": 0.0859, "projector_lr": 1.6764693921656616e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.25, "sft_loss": 0.6796875, "step": 5310 }, { "dpo_loss": 0.185546875, "epoch": 0.85, "final_loss": 0.185546875, "grad_norm": 0.0, "learning_rate": 5.576573771708032e-08, "loss": 0.2868, "projector_lr": 1.6729721315124096e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.09375, "sft_loss": 0.625, "step": 5311 }, { "dpo_loss": 0.08837890625, "epoch": 0.85, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 5.5649276900488984e-08, "loss": 0.1102, "projector_lr": 1.6694783070146696e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.78125, "sft_loss": 0.66015625, "step": 5312 }, { "dpo_loss": 0.1474609375, "epoch": 0.85, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 5.5532930652441854e-08, "loss": 0.0755, "projector_lr": 1.665987919573256e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.0, "sft_loss": 0.68359375, "step": 5313 }, { "dpo_loss": 0.08642578125, "epoch": 0.85, "final_loss": 0.08642578125, "grad_norm": 0.0, "learning_rate": 5.5416699002937106e-08, "loss": 0.2033, "projector_lr": 1.6625009700881132e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.140625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.875, "sft_loss": 1.15625, "step": 5314 }, { "dpo_loss": 0.2080078125, "epoch": 0.85, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 5.5300581981943096e-08, "loss": 0.1232, "projector_lr": 1.659017459458293e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.90625, "sft_loss": 0.6484375, "step": 5315 }, { "dpo_loss": 0.1171875, "epoch": 0.85, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 5.518457961939882e-08, "loss": 0.0916, "projector_lr": 1.6555373885819647e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.8125, "sft_loss": 0.7578125, "step": 5316 }, { "dpo_loss": 0.0791015625, "epoch": 0.85, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 5.506869194521335e-08, "loss": 0.1272, "projector_lr": 1.6520607583564007e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.21875, "sft_loss": 0.72265625, "step": 5317 }, { "dpo_loss": 0.06201171875, "epoch": 0.85, "final_loss": 0.06201171875, "grad_norm": 0.0, "learning_rate": 5.495291898926663e-08, "loss": 0.2732, "projector_lr": 1.6485875696779988e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.90625, "sft_loss": 1.0859375, "step": 5318 }, { "dpo_loss": 0.1162109375, "epoch": 0.85, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 5.48372607814086e-08, "loss": 0.1112, "projector_lr": 1.645117823442258e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.65625, "sft_loss": 0.61328125, "step": 5319 }, { "dpo_loss": 0.2734375, "epoch": 0.85, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 5.472171735146014e-08, "loss": 0.2247, "projector_lr": 1.6416515205438043e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.609375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.78125, "sft_loss": 0.59765625, "step": 5320 }, { "dpo_loss": 0.08837890625, "epoch": 0.85, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 5.4606288729212026e-08, "loss": 0.0762, "projector_lr": 1.638188661876361e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.625, "sft_loss": 0.8828125, "step": 5321 }, { "dpo_loss": 0.05615234375, "epoch": 0.85, "final_loss": 0.05615234375, "grad_norm": 0.0, "learning_rate": 5.4490974944425616e-08, "loss": 0.2299, "projector_lr": 1.6347292483327685e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 5.71875, "rewards_train/rejected": -7.21875, "sft_loss": 1.0625, "step": 5322 }, { "dpo_loss": 0.00384521484375, "epoch": 0.85, "final_loss": 0.00384521484375, "grad_norm": 0.0, "learning_rate": 5.4375776026832685e-08, "loss": 0.0204, "projector_lr": 1.6312732808049807e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.25, "sft_loss": 0.8125, "step": 5323 }, { "dpo_loss": 0.443359375, "epoch": 0.85, "final_loss": 0.443359375, "grad_norm": 0.0, "learning_rate": 5.4260692006135536e-08, "loss": 0.2899, "projector_lr": 1.627820760184066e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.234375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -4.0, "sft_loss": 0.625, "step": 5324 }, { "dpo_loss": 0.017333984375, "epoch": 0.85, "final_loss": 0.017333984375, "grad_norm": 0.0, "learning_rate": 5.414572291200653e-08, "loss": 0.1769, "projector_lr": 1.624371687360196e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.4375, "sft_loss": 0.87109375, "step": 5325 }, { "dpo_loss": 0.04638671875, "epoch": 0.85, "final_loss": 0.04638671875, "grad_norm": 0.0, "learning_rate": 5.403086877408869e-08, "loss": 0.1898, "projector_lr": 1.6209260632226607e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.46875, "sft_loss": 0.8203125, "step": 5326 }, { "dpo_loss": 0.060546875, "epoch": 0.85, "final_loss": 0.060546875, "grad_norm": 0.0, "learning_rate": 5.391612962199504e-08, "loss": 0.1093, "projector_lr": 1.6174838886598513e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4765625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.625, "sft_loss": 0.671875, "step": 5327 }, { "dpo_loss": 0.451171875, "epoch": 0.85, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 5.3801505485309485e-08, "loss": 0.5478, "projector_lr": 1.6140451645592845e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.8125, "sft_loss": 0.6484375, "step": 5328 }, { "dpo_loss": 0.23828125, "epoch": 0.85, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 5.3686996393585706e-08, "loss": 0.2335, "projector_lr": 1.6106098918075712e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -3.015625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -7.65625, "sft_loss": 1.015625, "step": 5329 }, { "dpo_loss": 0.1611328125, "epoch": 0.85, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 5.357260237634825e-08, "loss": 0.0934, "projector_lr": 1.6071780712904477e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.03125, "sft_loss": 1.0390625, "step": 5330 }, { "dpo_loss": 0.71484375, "epoch": 0.85, "final_loss": 0.71484375, "grad_norm": 0.0, "learning_rate": 5.345832346309165e-08, "loss": 0.3714, "projector_lr": 1.6037497038927496e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 3.140625, "rewards_train/rejected": -3.75, "sft_loss": 0.58203125, "step": 5331 }, { "dpo_loss": 0.1064453125, "epoch": 0.85, "final_loss": 0.1064453125, "grad_norm": 0.0, "learning_rate": 5.3344159683280765e-08, "loss": 0.1288, "projector_lr": 1.600324790498423e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4921875, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.90625, "sft_loss": 0.82421875, "step": 5332 }, { "dpo_loss": 0.1123046875, "epoch": 0.85, "final_loss": 0.1123046875, "grad_norm": 0.0, "learning_rate": 5.323011106635084e-08, "loss": 0.0694, "projector_lr": 1.5969033319905252e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.375, "sft_loss": 0.99609375, "step": 5333 }, { "dpo_loss": 0.0478515625, "epoch": 0.85, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 5.311617764170756e-08, "loss": 0.0787, "projector_lr": 1.593485329251227e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.40625, "sft_loss": 1.0078125, "step": 5334 }, { "dpo_loss": 0.06494140625, "epoch": 0.85, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 5.3002359438726776e-08, "loss": 0.2442, "projector_lr": 1.5900707831618033e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.21875, "sft_loss": 0.8125, "step": 5335 }, { "dpo_loss": 0.15625, "epoch": 0.85, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 5.2888656486754446e-08, "loss": 0.241, "projector_lr": 1.5866596946026335e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 3.453125, "rewards_train/rejected": -5.3125, "sft_loss": 0.625, "step": 5336 }, { "dpo_loss": 0.39453125, "epoch": 0.85, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 5.277506881510729e-08, "loss": 0.2631, "projector_lr": 1.583252064453219e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 3.53125, "rewards_train/rejected": -5.0, "sft_loss": 0.85546875, "step": 5337 }, { "dpo_loss": 0.10546875, "epoch": 0.85, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 5.2661596453071944e-08, "loss": 0.0968, "projector_lr": 1.5798478935921584e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 6.90625, "rewards_train/rejected": -8.0625, "sft_loss": 0.66796875, "step": 5338 }, { "dpo_loss": 0.1318359375, "epoch": 0.85, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 5.254823942990533e-08, "loss": 0.1641, "projector_lr": 1.57644718289716e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.9375, "sft_loss": 0.8203125, "step": 5339 }, { "dpo_loss": 0.0869140625, "epoch": 0.85, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 5.243499777483457e-08, "loss": 0.051, "projector_lr": 1.5730499332450371e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.90625, "sft_loss": 0.94921875, "step": 5340 }, { "dpo_loss": 0.09130859375, "epoch": 0.85, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 5.232187151705747e-08, "loss": 0.1031, "projector_lr": 1.5696561455117242e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.59375, "sft_loss": 1.0078125, "step": 5341 }, { "dpo_loss": 0.12353515625, "epoch": 0.85, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 5.220886068574154e-08, "loss": 0.1262, "projector_lr": 1.5662658205722463e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.9375, "sft_loss": 0.65625, "step": 5342 }, { "dpo_loss": 0.09130859375, "epoch": 0.85, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 5.209596531002503e-08, "loss": 0.2406, "projector_lr": 1.562878959300751e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.40625, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.28125, "sft_loss": 0.70703125, "step": 5343 }, { "dpo_loss": 0.053466796875, "epoch": 0.86, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 5.198318541901575e-08, "loss": 0.0803, "projector_lr": 1.5594955625704726e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49609375, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.375, "sft_loss": 0.9921875, "step": 5344 }, { "dpo_loss": 0.345703125, "epoch": 0.86, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 5.187052104179252e-08, "loss": 0.2325, "projector_lr": 1.5561156312537756e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": 0.267578125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.21875, "sft_loss": 0.90234375, "step": 5345 }, { "dpo_loss": 0.01153564453125, "epoch": 0.86, "final_loss": 0.01153564453125, "grad_norm": 0.0, "learning_rate": 5.1757972207403796e-08, "loss": 0.1342, "projector_lr": 1.552739166222114e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.4375, "sft_loss": 0.8125, "step": 5346 }, { "dpo_loss": 0.01202392578125, "epoch": 0.86, "final_loss": 0.01202392578125, "grad_norm": 0.0, "learning_rate": 5.164553894486856e-08, "loss": 0.338, "projector_lr": 1.549366168346057e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 6.28125, "rewards_train/rejected": -7.5625, "sft_loss": 0.92578125, "step": 5347 }, { "dpo_loss": 0.1630859375, "epoch": 0.86, "final_loss": 0.1630859375, "grad_norm": 0.0, "learning_rate": 5.153322128317583e-08, "loss": 0.4088, "projector_lr": 1.545996638495275e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.56640625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.625, "sft_loss": 0.83984375, "step": 5348 }, { "dpo_loss": 0.0244140625, "epoch": 0.86, "final_loss": 0.0244140625, "grad_norm": 0.0, "learning_rate": 5.142101925128495e-08, "loss": 0.1238, "projector_lr": 1.5426305775385485e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.953125, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.09375, "sft_loss": 0.984375, "step": 5349 }, { "dpo_loss": 0.09912109375, "epoch": 0.86, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 5.1308932878125156e-08, "loss": 0.2351, "projector_lr": 1.5392679863437549e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.416015625, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.15625, "sft_loss": 0.5625, "step": 5350 }, { "dpo_loss": 0.35546875, "epoch": 0.86, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 5.119696219259634e-08, "loss": 0.3549, "projector_lr": 1.5359088657778902e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.109375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.40625, "sft_loss": 0.6796875, "step": 5351 }, { "dpo_loss": 0.208984375, "epoch": 0.86, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 5.108510722356818e-08, "loss": 0.168, "projector_lr": 1.5325532167070454e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.59375, "sft_loss": 0.7421875, "step": 5352 }, { "dpo_loss": 0.53125, "epoch": 0.86, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 5.097336799988067e-08, "loss": 0.2676, "projector_lr": 1.5292010399964202e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8984375, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.8125, "sft_loss": 1.1328125, "step": 5353 }, { "dpo_loss": 0.2275390625, "epoch": 0.86, "final_loss": 0.2275390625, "grad_norm": 0.0, "learning_rate": 5.086174455034381e-08, "loss": 0.1204, "projector_lr": 1.5258523365103144e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.46875, "sft_loss": 0.8671875, "step": 5354 }, { "dpo_loss": 0.1767578125, "epoch": 0.86, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 5.075023690373809e-08, "loss": 0.1029, "projector_lr": 1.5225071071121427e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.40625, "sft_loss": 0.78125, "step": 5355 }, { "dpo_loss": 0.0244140625, "epoch": 0.86, "final_loss": 0.0244140625, "grad_norm": 0.0, "learning_rate": 5.0638845088813775e-08, "loss": 0.0252, "projector_lr": 1.5191653526644134e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.625, "sft_loss": 0.640625, "step": 5356 }, { "dpo_loss": 0.034912109375, "epoch": 0.86, "final_loss": 0.034912109375, "grad_norm": 0.0, "learning_rate": 5.052756913429163e-08, "loss": 0.0388, "projector_lr": 1.515827074028749e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1328125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.8125, "sft_loss": 0.640625, "step": 5357 }, { "dpo_loss": 0.2392578125, "epoch": 0.86, "final_loss": 0.2392578125, "grad_norm": 0.0, "learning_rate": 5.0416409068861966e-08, "loss": 0.1416, "projector_lr": 1.512492272065859e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.625, "sft_loss": 0.859375, "step": 5358 }, { "dpo_loss": 0.0250244140625, "epoch": 0.86, "final_loss": 0.0250244140625, "grad_norm": 0.0, "learning_rate": 5.030536492118592e-08, "loss": 0.0892, "projector_lr": 1.5091609476355777e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 7.78125, "rewards_train/rejected": -8.75, "sft_loss": 0.62109375, "step": 5359 }, { "dpo_loss": 0.08544921875, "epoch": 0.86, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 5.019443671989421e-08, "loss": 0.1696, "projector_lr": 1.5058331015968263e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.78125, "sft_loss": 0.8125, "step": 5360 }, { "dpo_loss": 0.474609375, "epoch": 0.86, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 5.008362449358805e-08, "loss": 0.339, "projector_lr": 1.5025087348076415e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.984375, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.9375, "sft_loss": 0.87890625, "step": 5361 }, { "dpo_loss": 0.162109375, "epoch": 0.86, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 4.9972928270838266e-08, "loss": 0.2469, "projector_lr": 1.499187848125148e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.6875, "sft_loss": 1.078125, "step": 5362 }, { "dpo_loss": 0.125, "epoch": 0.86, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 4.98623480801863e-08, "loss": 0.1919, "projector_lr": 1.4958704424055892e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.8125, "sft_loss": 0.80859375, "step": 5363 }, { "dpo_loss": 0.1484375, "epoch": 0.86, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 4.9751883950143234e-08, "loss": 0.414, "projector_lr": 1.492556518504297e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.859375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.96875, "sft_loss": 0.8671875, "step": 5364 }, { "dpo_loss": 0.050048828125, "epoch": 0.86, "final_loss": 0.050048828125, "grad_norm": 0.0, "learning_rate": 4.9641535909190676e-08, "loss": 0.1048, "projector_lr": 1.4892460772757205e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.90625, "sft_loss": 0.73828125, "step": 5365 }, { "dpo_loss": 0.328125, "epoch": 0.86, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 4.9531303985779984e-08, "loss": 0.2137, "projector_lr": 1.4859391195733997e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.15625, "sft_loss": 0.6484375, "step": 5366 }, { "dpo_loss": 0.1337890625, "epoch": 0.86, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 4.9421188208332574e-08, "loss": 0.1339, "projector_lr": 1.4826356462499773e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.625, "sft_loss": 0.765625, "step": 5367 }, { "dpo_loss": 0.08837890625, "epoch": 0.86, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 4.931118860523992e-08, "loss": 0.3937, "projector_lr": 1.4793356581571977e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.0625, "sft_loss": 0.84765625, "step": 5368 }, { "dpo_loss": 0.06689453125, "epoch": 0.86, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 4.920130520486382e-08, "loss": 0.1359, "projector_lr": 1.4760391561459147e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1201171875, "rewards_train/margins": 6.8125, "rewards_train/rejected": -6.6875, "sft_loss": 0.70703125, "step": 5369 }, { "dpo_loss": 0.3125, "epoch": 0.86, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 4.909153803553584e-08, "loss": 0.28, "projector_lr": 1.4727461410660754e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.59375, "sft_loss": 0.71484375, "step": 5370 }, { "dpo_loss": 0.0162353515625, "epoch": 0.86, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 4.8981887125557456e-08, "loss": 0.121, "projector_lr": 1.4694566137667236e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.09375, "sft_loss": 0.87109375, "step": 5371 }, { "dpo_loss": 0.50390625, "epoch": 0.86, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 4.887235250320065e-08, "loss": 0.3209, "projector_lr": 1.4661705750960196e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.1875, "sft_loss": 0.6640625, "step": 5372 }, { "dpo_loss": 0.027587890625, "epoch": 0.86, "final_loss": 0.027587890625, "grad_norm": 0.0, "learning_rate": 4.876293419670696e-08, "loss": 0.3881, "projector_lr": 1.4628880259012088e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.5, "sft_loss": 0.83984375, "step": 5373 }, { "dpo_loss": 0.44140625, "epoch": 0.86, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 4.865363223428809e-08, "loss": 0.2662, "projector_lr": 1.4596089670286427e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 2.078125, "rewards_train/rejected": -2.828125, "sft_loss": 0.640625, "step": 5374 }, { "dpo_loss": 0.37109375, "epoch": 0.86, "final_loss": 0.37109375, "grad_norm": 0.0, "learning_rate": 4.8544446644125825e-08, "loss": 0.2763, "projector_lr": 1.4563333993237748e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 2.484375, "rewards_train/rejected": -3.9375, "sft_loss": 0.8671875, "step": 5375 }, { "dpo_loss": 0.20703125, "epoch": 0.86, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 4.843537745437187e-08, "loss": 0.2094, "projector_lr": 1.4530613236311563e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.6875, "sft_loss": 0.94921875, "step": 5376 }, { "dpo_loss": 0.2578125, "epoch": 0.86, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 4.832642469314785e-08, "loss": 0.1484, "projector_lr": 1.4497927407944357e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.375, "sft_loss": 0.9296875, "step": 5377 }, { "dpo_loss": 0.263671875, "epoch": 0.86, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 4.8217588388545563e-08, "loss": 0.3249, "projector_lr": 1.446527651656367e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.625, "sft_loss": 0.9140625, "step": 5378 }, { "dpo_loss": 0.12158203125, "epoch": 0.86, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 4.810886856862661e-08, "loss": 0.3599, "projector_lr": 1.4432660570587985e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.35546875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.28125, "sft_loss": 0.75390625, "step": 5379 }, { "dpo_loss": 0.30078125, "epoch": 0.86, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 4.800026526142259e-08, "loss": 0.2184, "projector_lr": 1.4400079578426777e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.875, "sft_loss": 0.79296875, "step": 5380 }, { "dpo_loss": 0.01226806640625, "epoch": 0.86, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 4.789177849493509e-08, "loss": 0.0395, "projector_lr": 1.4367533548480528e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.6875, "sft_loss": 0.75, "step": 5381 }, { "dpo_loss": 0.07666015625, "epoch": 0.86, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 4.778340829713567e-08, "loss": 0.062, "projector_lr": 1.4335022489140703e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.875, "sft_loss": 0.72265625, "step": 5382 }, { "dpo_loss": 0.17578125, "epoch": 0.86, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 4.7675154695965724e-08, "loss": 0.1268, "projector_lr": 1.4302546408789718e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1640625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -4.96875, "sft_loss": 0.84375, "step": 5383 }, { "dpo_loss": 0.103515625, "epoch": 0.86, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 4.756701771933691e-08, "loss": 0.1122, "projector_lr": 1.4270105315801075e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.3125, "sft_loss": 0.8125, "step": 5384 }, { "dpo_loss": 0.06396484375, "epoch": 0.86, "final_loss": 0.06396484375, "grad_norm": 0.0, "learning_rate": 4.745899739513026e-08, "loss": 0.0513, "projector_lr": 1.423769921853908e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.8125, "sft_loss": 0.76171875, "step": 5385 }, { "dpo_loss": 0.103515625, "epoch": 0.86, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 4.735109375119722e-08, "loss": 0.1935, "projector_lr": 1.4205328125359168e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.310546875, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.3125, "sft_loss": 0.7578125, "step": 5386 }, { "dpo_loss": 0.0137939453125, "epoch": 0.86, "final_loss": 0.0137939453125, "grad_norm": 0.0, "learning_rate": 4.724330681535887e-08, "loss": 0.0936, "projector_lr": 1.4172992044607663e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 7.53125, "rewards_train/rejected": -8.375, "sft_loss": 0.54296875, "step": 5387 }, { "dpo_loss": 0.1982421875, "epoch": 0.86, "final_loss": 0.1982421875, "grad_norm": 0.0, "learning_rate": 4.7135636615406536e-08, "loss": 0.1094, "projector_lr": 1.4140690984621961e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 6.65625, "rewards_train/rejected": -7.4375, "sft_loss": 0.72265625, "step": 5388 }, { "dpo_loss": 0.130859375, "epoch": 0.86, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 4.7028083179100894e-08, "loss": 0.1469, "projector_lr": 1.410842495373027e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.8125, "sft_loss": 1.0546875, "step": 5389 }, { "dpo_loss": 0.380859375, "epoch": 0.86, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 4.692064653417305e-08, "loss": 0.3045, "projector_lr": 1.4076193960251914e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3125, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.15625, "sft_loss": 1.03125, "step": 5390 }, { "dpo_loss": 0.54296875, "epoch": 0.86, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 4.681332670832366e-08, "loss": 0.3143, "projector_lr": 1.40439980124971e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.9375, "sft_loss": 0.62890625, "step": 5391 }, { "dpo_loss": 0.177734375, "epoch": 0.86, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 4.67061237292235e-08, "loss": 0.2164, "projector_lr": 1.401183711876705e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.765625, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.90625, "sft_loss": 1.09375, "step": 5392 }, { "dpo_loss": 0.08447265625, "epoch": 0.86, "final_loss": 0.08447265625, "grad_norm": 0.0, "learning_rate": 4.659903762451306e-08, "loss": 0.1456, "projector_lr": 1.397971128735392e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.056396484375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -5.65625, "sft_loss": 0.53515625, "step": 5393 }, { "dpo_loss": 0.07861328125, "epoch": 0.86, "final_loss": 0.07861328125, "grad_norm": 0.0, "learning_rate": 4.64920684218027e-08, "loss": 0.1755, "projector_lr": 1.394762052654081e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.0, "sft_loss": 0.640625, "step": 5394 }, { "dpo_loss": 0.15625, "epoch": 0.86, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 4.638521614867258e-08, "loss": 0.1001, "projector_lr": 1.3915564844601774e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 3.8125, "rewards_train/rejected": -4.625, "sft_loss": 0.703125, "step": 5395 }, { "dpo_loss": 0.0654296875, "epoch": 0.86, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 4.627848083267305e-08, "loss": 0.1018, "projector_lr": 1.3883544249801916e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0189208984375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.0, "sft_loss": 0.85546875, "step": 5396 }, { "dpo_loss": 0.1826171875, "epoch": 0.86, "final_loss": 0.1826171875, "grad_norm": 0.0, "learning_rate": 4.6171862501323875e-08, "loss": 0.2001, "projector_lr": 1.3851558750397164e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.9375, "sft_loss": 0.87890625, "step": 5397 }, { "dpo_loss": 0.00787353515625, "epoch": 0.86, "final_loss": 0.00787353515625, "grad_norm": 0.0, "learning_rate": 4.6065361182114904e-08, "loss": 0.0196, "projector_lr": 1.3819608354634473e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.171875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -6.4375, "sft_loss": 0.57421875, "step": 5398 }, { "dpo_loss": 0.00994873046875, "epoch": 0.86, "final_loss": 0.00994873046875, "grad_norm": 0.0, "learning_rate": 4.5958976902505665e-08, "loss": 0.0985, "projector_lr": 1.37876930707517e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 7.09375, "rewards_train/rejected": -7.46875, "sft_loss": 0.859375, "step": 5399 }, { "dpo_loss": 0.0439453125, "epoch": 0.86, "final_loss": 0.0439453125, "grad_norm": 0.0, "learning_rate": 4.5852709689925717e-08, "loss": 0.2145, "projector_lr": 1.3755812906977716e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 3.6875, "rewards_train/rejected": -4.6875, "sft_loss": 0.8359375, "step": 5400 }, { "dpo_loss": 0.03271484375, "epoch": 0.86, "final_loss": 0.03271484375, "grad_norm": 0.0, "learning_rate": 4.574655957177426e-08, "loss": 0.267, "projector_lr": 1.372396787153228e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 6.5625, "rewards_train/rejected": -8.0625, "sft_loss": 1.0078125, "step": 5401 }, { "dpo_loss": 0.0751953125, "epoch": 0.86, "final_loss": 0.0751953125, "grad_norm": 0.0, "learning_rate": 4.56405265754205e-08, "loss": 0.1965, "projector_lr": 1.369215797262615e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.375, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.875, "sft_loss": 0.486328125, "step": 5402 }, { "dpo_loss": 0.05615234375, "epoch": 0.86, "final_loss": 0.05615234375, "grad_norm": 0.0, "learning_rate": 4.5534610728203024e-08, "loss": 0.1044, "projector_lr": 1.3660383218460908e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.6875, "sft_loss": 0.89453125, "step": 5403 }, { "dpo_loss": 0.16796875, "epoch": 0.86, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 4.542881205743077e-08, "loss": 0.0931, "projector_lr": 1.362864361722923e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.671875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.125, "sft_loss": 0.75, "step": 5404 }, { "dpo_loss": 0.1640625, "epoch": 0.86, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 4.5323130590381895e-08, "loss": 0.138, "projector_lr": 1.359693917711457e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 3.0625, "rewards_train/rejected": -4.0625, "sft_loss": 0.7890625, "step": 5405 }, { "dpo_loss": 0.388671875, "epoch": 0.86, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 4.521756635430496e-08, "loss": 0.2499, "projector_lr": 1.3565269906291488e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -6.40625, "sft_loss": 0.7421875, "step": 5406 }, { "dpo_loss": 0.3984375, "epoch": 0.87, "final_loss": 0.3984375, "grad_norm": 0.0, "learning_rate": 4.5112119376417844e-08, "loss": 0.2798, "projector_lr": 1.3533635812925354e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 2.796875, "rewards_train/rejected": -4.125, "sft_loss": 1.2890625, "step": 5407 }, { "dpo_loss": 0.298828125, "epoch": 0.87, "final_loss": 0.298828125, "grad_norm": 0.0, "learning_rate": 4.5006789683908255e-08, "loss": 0.1786, "projector_lr": 1.3502036905172477e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 3.140625, "rewards_train/rejected": -4.5, "sft_loss": 1.2734375, "step": 5408 }, { "dpo_loss": 0.126953125, "epoch": 0.87, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 4.4901577303933733e-08, "loss": 0.0723, "projector_lr": 1.347047319118012e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.75, "sft_loss": 0.68359375, "step": 5409 }, { "dpo_loss": 0.039794921875, "epoch": 0.87, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 4.479648226362165e-08, "loss": 0.0674, "projector_lr": 1.3438944679086495e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.84375, "sft_loss": 0.65234375, "step": 5410 }, { "dpo_loss": 0.1318359375, "epoch": 0.87, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 4.469150459006899e-08, "loss": 0.1012, "projector_lr": 1.3407451377020697e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -6.84375, "sft_loss": 0.6875, "step": 5411 }, { "dpo_loss": 0.1103515625, "epoch": 0.87, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 4.458664431034242e-08, "loss": 0.1086, "projector_lr": 1.337599329310273e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.78125, "sft_loss": 0.62109375, "step": 5412 }, { "dpo_loss": 0.0458984375, "epoch": 0.87, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 4.448190145147862e-08, "loss": 0.2702, "projector_lr": 1.3344570435443588e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.43359375, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.40625, "sft_loss": 0.71484375, "step": 5413 }, { "dpo_loss": 0.3359375, "epoch": 0.87, "final_loss": 0.3359375, "grad_norm": 0.0, "learning_rate": 4.4377276040483804e-08, "loss": 0.4101, "projector_lr": 1.331318281214514e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.96875, "sft_loss": 0.78515625, "step": 5414 }, { "dpo_loss": 0.1083984375, "epoch": 0.87, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 4.4272768104333833e-08, "loss": 0.103, "projector_lr": 1.3281830431300153e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.21875, "sft_loss": 0.859375, "step": 5415 }, { "dpo_loss": 0.5078125, "epoch": 0.87, "final_loss": 0.5078125, "grad_norm": 0.0, "learning_rate": 4.4168377669974275e-08, "loss": 0.3677, "projector_lr": 1.3250513300992285e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.015625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.84375, "sft_loss": 0.75390625, "step": 5416 }, { "dpo_loss": 0.5546875, "epoch": 0.87, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 4.4064104764320706e-08, "loss": 0.3225, "projector_lr": 1.3219231429296214e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.734375, "rewards_train/margins": 3.390625, "rewards_train/rejected": -5.125, "sft_loss": 1.0703125, "step": 5417 }, { "dpo_loss": 0.333984375, "epoch": 0.87, "final_loss": 0.333984375, "grad_norm": 0.0, "learning_rate": 4.395994941425796e-08, "loss": 0.2426, "projector_lr": 1.3187984824277388e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.3125, "sft_loss": 0.73828125, "step": 5418 }, { "dpo_loss": 0.0830078125, "epoch": 0.87, "final_loss": 0.0830078125, "grad_norm": 0.0, "learning_rate": 4.385591164664104e-08, "loss": 0.2006, "projector_lr": 1.3156773493992312e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0732421875, "rewards_train/margins": 4.75, "rewards_train/rejected": -4.84375, "sft_loss": 0.86328125, "step": 5419 }, { "dpo_loss": 0.03955078125, "epoch": 0.87, "final_loss": 0.03955078125, "grad_norm": 0.0, "learning_rate": 4.3751991488294226e-08, "loss": 0.1407, "projector_lr": 1.3125597446488268e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.84375, "sft_loss": 0.48046875, "step": 5420 }, { "dpo_loss": 0.10595703125, "epoch": 0.87, "final_loss": 0.10595703125, "grad_norm": 0.0, "learning_rate": 4.364818896601169e-08, "loss": 0.0645, "projector_lr": 1.3094456689803508e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.625, "sft_loss": 0.76953125, "step": 5421 }, { "dpo_loss": 0.1142578125, "epoch": 0.87, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 4.354450410655702e-08, "loss": 0.2332, "projector_lr": 1.306335123196711e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.875, "sft_loss": 0.94140625, "step": 5422 }, { "dpo_loss": 0.71875, "epoch": 0.87, "final_loss": 0.71875, "grad_norm": 0.0, "learning_rate": 4.3440936936663894e-08, "loss": 0.3661, "projector_lr": 1.303228108099917e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 2.671875, "rewards_train/rejected": -4.09375, "sft_loss": 0.90234375, "step": 5423 }, { "dpo_loss": 0.34375, "epoch": 0.87, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 4.333748748303534e-08, "loss": 0.2145, "projector_lr": 1.3001246244910602e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.53125, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.90625, "sft_loss": 0.5703125, "step": 5424 }, { "dpo_loss": 0.1171875, "epoch": 0.87, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 4.323415577234407e-08, "loss": 0.1061, "projector_lr": 1.2970246731703224e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.21875, "sft_loss": 0.5546875, "step": 5425 }, { "dpo_loss": 0.3671875, "epoch": 0.87, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 4.313094183123239e-08, "loss": 0.2945, "projector_lr": 1.2939282549369718e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.90625, "sft_loss": 0.7578125, "step": 5426 }, { "dpo_loss": 0.07666015625, "epoch": 0.87, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 4.3027845686312456e-08, "loss": 0.0954, "projector_lr": 1.2908353705893738e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.640625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.25, "sft_loss": 0.59375, "step": 5427 }, { "dpo_loss": 0.57421875, "epoch": 0.87, "final_loss": 0.57421875, "grad_norm": 0.0, "learning_rate": 4.292486736416584e-08, "loss": 0.377, "projector_lr": 1.2877460209249753e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.078125, "rewards_train/rejected": -4.09375, "sft_loss": 0.77734375, "step": 5428 }, { "dpo_loss": 0.0791015625, "epoch": 0.87, "final_loss": 0.0791015625, "grad_norm": 0.0, "learning_rate": 4.282200689134397e-08, "loss": 0.0632, "projector_lr": 1.284660206740319e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.34375, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.53125, "sft_loss": 0.84765625, "step": 5429 }, { "dpo_loss": 0.11572265625, "epoch": 0.87, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 4.271926429436745e-08, "loss": 0.0818, "projector_lr": 1.2815779288310238e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.1875, "sft_loss": 0.82421875, "step": 5430 }, { "dpo_loss": 0.037841796875, "epoch": 0.87, "final_loss": 0.037841796875, "grad_norm": 0.0, "learning_rate": 4.261663959972706e-08, "loss": 0.0819, "projector_lr": 1.278499187991812e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.40625, "sft_loss": 0.875, "step": 5431 }, { "dpo_loss": 0.10302734375, "epoch": 0.87, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 4.251413283388261e-08, "loss": 0.1339, "projector_lr": 1.2754239850164783e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.34375, "sft_loss": 0.89453125, "step": 5432 }, { "dpo_loss": 0.205078125, "epoch": 0.87, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 4.241174402326414e-08, "loss": 0.1196, "projector_lr": 1.2723523206979243e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 2.75, "rewards_train/rejected": -3.359375, "sft_loss": 0.76171875, "step": 5433 }, { "dpo_loss": 0.06494140625, "epoch": 0.87, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 4.230947319427053e-08, "loss": 0.0792, "projector_lr": 1.269284195828116e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.59375, "sft_loss": 0.90625, "step": 5434 }, { "dpo_loss": 0.18359375, "epoch": 0.87, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 4.220732037327096e-08, "loss": 0.3045, "projector_lr": 1.266219611198129e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.9375, "sft_loss": 0.89453125, "step": 5435 }, { "dpo_loss": 0.09375, "epoch": 0.87, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 4.210528558660359e-08, "loss": 0.0637, "projector_lr": 1.2631585675981077e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 3.78125, "rewards_train/rejected": -5.21875, "sft_loss": 0.69140625, "step": 5436 }, { "dpo_loss": 0.11279296875, "epoch": 0.87, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 4.200336886057665e-08, "loss": 0.1362, "projector_lr": 1.2601010658172996e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.34375, "sft_loss": 0.7265625, "step": 5437 }, { "dpo_loss": 0.027099609375, "epoch": 0.87, "final_loss": 0.027099609375, "grad_norm": 0.0, "learning_rate": 4.190157022146762e-08, "loss": 0.2216, "projector_lr": 1.2570471066440286e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9453125, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.3125, "sft_loss": 0.875, "step": 5438 }, { "dpo_loss": 0.051025390625, "epoch": 0.87, "final_loss": 0.051025390625, "grad_norm": 0.0, "learning_rate": 4.179988969552361e-08, "loss": 0.0483, "projector_lr": 1.2539966908657086e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.375, "sft_loss": 0.5390625, "step": 5439 }, { "dpo_loss": 0.0037994384765625, "epoch": 0.87, "final_loss": 0.0037994384765625, "grad_norm": 0.0, "learning_rate": 4.1698327308961166e-08, "loss": 0.0473, "projector_lr": 1.2509498192688352e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 6.8125, "rewards_train/rejected": -7.71875, "sft_loss": 0.76953125, "step": 5440 }, { "dpo_loss": 0.2265625, "epoch": 0.87, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 4.1596883087966606e-08, "loss": 0.1182, "projector_lr": 1.2479064926389983e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.6875, "sft_loss": 0.75, "step": 5441 }, { "dpo_loss": 0.10205078125, "epoch": 0.87, "final_loss": 0.10205078125, "grad_norm": 0.0, "learning_rate": 4.1495557058695675e-08, "loss": 0.1724, "projector_lr": 1.2448667117608703e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.4375, "sft_loss": 0.71484375, "step": 5442 }, { "dpo_loss": 0.130859375, "epoch": 0.87, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 4.139434924727358e-08, "loss": 0.1728, "projector_lr": 1.2418304774182077e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.498046875, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.78125, "sft_loss": 0.75, "step": 5443 }, { "dpo_loss": 0.1572265625, "epoch": 0.87, "final_loss": 0.1572265625, "grad_norm": 0.0, "learning_rate": 4.1293259679794965e-08, "loss": 0.1273, "projector_lr": 1.238797790393849e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.330078125, "rewards_train/margins": 3.3125, "rewards_train/rejected": -3.640625, "sft_loss": 0.7890625, "step": 5444 }, { "dpo_loss": 0.037353515625, "epoch": 0.87, "final_loss": 0.037353515625, "grad_norm": 0.0, "learning_rate": 4.1192288382324356e-08, "loss": 0.031, "projector_lr": 1.235768651469731e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6875, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.25, "sft_loss": 0.5703125, "step": 5445 }, { "dpo_loss": 0.043212890625, "epoch": 0.87, "final_loss": 0.043212890625, "grad_norm": 0.0, "learning_rate": 4.109143538089532e-08, "loss": 0.1638, "projector_lr": 1.2327430614268598e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.84375, "sft_loss": 1.0078125, "step": 5446 }, { "dpo_loss": 0.1591796875, "epoch": 0.87, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 4.099070070151139e-08, "loss": 0.0985, "projector_lr": 1.2297210210453419e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.5, "sft_loss": 0.7578125, "step": 5447 }, { "dpo_loss": 0.0947265625, "epoch": 0.87, "final_loss": 0.0947265625, "grad_norm": 0.0, "learning_rate": 4.0890084370145237e-08, "loss": 0.0949, "projector_lr": 1.2267025311043572e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.47265625, "rewards_train/margins": 6.9375, "rewards_train/rejected": -7.4375, "sft_loss": 0.6640625, "step": 5448 }, { "dpo_loss": 0.0908203125, "epoch": 0.87, "final_loss": 0.0908203125, "grad_norm": 0.0, "learning_rate": 4.0789586412739055e-08, "loss": 0.0632, "projector_lr": 1.2236875923821717e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 6.59375, "rewards_train/rejected": -7.84375, "sft_loss": 0.71875, "step": 5449 }, { "dpo_loss": 0.1865234375, "epoch": 0.87, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 4.0689206855204606e-08, "loss": 0.1004, "projector_lr": 1.2206762056561382e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.5625, "sft_loss": 0.88671875, "step": 5450 }, { "dpo_loss": 0.1455078125, "epoch": 0.87, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 4.058894572342325e-08, "loss": 0.3328, "projector_lr": 1.2176683717026977e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.40625, "sft_loss": 0.828125, "step": 5451 }, { "dpo_loss": 0.310546875, "epoch": 0.87, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 4.048880304324559e-08, "loss": 0.3864, "projector_lr": 1.2146640912973677e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.15625, "rewards_train/rejected": -5.3125, "sft_loss": 0.6328125, "step": 5452 }, { "dpo_loss": 0.220703125, "epoch": 0.87, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 4.038877884049169e-08, "loss": 0.1808, "projector_lr": 1.2116633652147508e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.96875, "sft_loss": 0.75390625, "step": 5453 }, { "dpo_loss": 0.07421875, "epoch": 0.87, "final_loss": 0.07421875, "grad_norm": 0.0, "learning_rate": 4.0288873140951305e-08, "loss": 0.1284, "projector_lr": 1.2086661942285392e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76171875, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.75, "sft_loss": 0.73046875, "step": 5454 }, { "dpo_loss": 0.55859375, "epoch": 0.87, "final_loss": 0.55859375, "grad_norm": 0.0, "learning_rate": 4.01890859703834e-08, "loss": 0.3878, "projector_lr": 1.205672579111502e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.421875, "rewards_train/margins": 3.328125, "rewards_train/rejected": -5.75, "sft_loss": 1.40625, "step": 5455 }, { "dpo_loss": 0.1279296875, "epoch": 0.87, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 4.008941735451643e-08, "loss": 0.1218, "projector_lr": 1.202682520635493e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.21875, "sft_loss": 0.92578125, "step": 5456 }, { "dpo_loss": 0.0162353515625, "epoch": 0.87, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 3.99898673190483e-08, "loss": 0.0231, "projector_lr": 1.199696019571449e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.0625, "sft_loss": 0.84375, "step": 5457 }, { "dpo_loss": 0.2490234375, "epoch": 0.87, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 3.9890435889646525e-08, "loss": 0.5147, "projector_lr": 1.1967130766893957e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.15625, "sft_loss": 0.83203125, "step": 5458 }, { "dpo_loss": 0.6015625, "epoch": 0.87, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 3.9791123091947595e-08, "loss": 0.3376, "projector_lr": 1.193733692758428e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 1.4296875, "rewards_train/rejected": -2.875, "sft_loss": 0.9921875, "step": 5459 }, { "dpo_loss": 0.310546875, "epoch": 0.87, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 3.969192895155804e-08, "loss": 0.2057, "projector_lr": 1.1907578685467413e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.4375, "sft_loss": 0.7421875, "step": 5460 }, { "dpo_loss": 0.0615234375, "epoch": 0.87, "final_loss": 0.0615234375, "grad_norm": 0.0, "learning_rate": 3.9592853494053055e-08, "loss": 0.0846, "projector_lr": 1.1877856048215918e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.1875, "sft_loss": 0.609375, "step": 5461 }, { "dpo_loss": 0.19921875, "epoch": 0.87, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 3.949389674497794e-08, "loss": 0.2899, "projector_lr": 1.1848169023493383e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6171875, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.21875, "sft_loss": 0.7109375, "step": 5462 }, { "dpo_loss": 0.5546875, "epoch": 0.87, "final_loss": 0.5546875, "grad_norm": 0.0, "learning_rate": 3.9395058729846774e-08, "loss": 0.3199, "projector_lr": 1.1818517618954034e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.578125, "rewards_train/rejected": -5.0625, "sft_loss": 0.8828125, "step": 5463 }, { "dpo_loss": 0.0208740234375, "epoch": 0.87, "final_loss": 0.0208740234375, "grad_norm": 0.0, "learning_rate": 3.929633947414357e-08, "loss": 0.0255, "projector_lr": 1.1788901842243071e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.875, "sft_loss": 0.98046875, "step": 5464 }, { "dpo_loss": 0.265625, "epoch": 0.87, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 3.9197739003321406e-08, "loss": 0.2022, "projector_lr": 1.1759321700996422e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 3.9375, "rewards_train/rejected": -4.84375, "sft_loss": 1.0625, "step": 5465 }, { "dpo_loss": 0.006500244140625, "epoch": 0.87, "final_loss": 0.006500244140625, "grad_norm": 0.0, "learning_rate": 3.909925734280278e-08, "loss": 0.0061, "projector_lr": 1.1729777202840836e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 7.125, "rewards_train/rejected": -7.96875, "sft_loss": 0.73046875, "step": 5466 }, { "dpo_loss": 0.021728515625, "epoch": 0.87, "final_loss": 0.021728515625, "grad_norm": 0.0, "learning_rate": 3.9000894517979446e-08, "loss": 0.0754, "projector_lr": 1.1700268355393834e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.65625, "sft_loss": 0.7109375, "step": 5467 }, { "dpo_loss": 0.2734375, "epoch": 0.87, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 3.8902650554212826e-08, "loss": 0.2864, "projector_lr": 1.1670795166263848e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.765625, "rewards_train/margins": 3.578125, "rewards_train/rejected": -5.34375, "sft_loss": 0.8671875, "step": 5468 }, { "dpo_loss": 0.4609375, "epoch": 0.88, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 3.880452547683349e-08, "loss": 0.3072, "projector_lr": 1.164135764305005e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.78125, "sft_loss": 0.55078125, "step": 5469 }, { "dpo_loss": 0.1748046875, "epoch": 0.88, "final_loss": 0.1748046875, "grad_norm": 0.0, "learning_rate": 3.8706519311141274e-08, "loss": 0.2225, "projector_lr": 1.1611955793342383e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.15625, "sft_loss": 0.84765625, "step": 5470 }, { "dpo_loss": 0.15234375, "epoch": 0.88, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 3.86086320824055e-08, "loss": 0.1988, "projector_lr": 1.158258962472165e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.859375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.8125, "sft_loss": 0.7265625, "step": 5471 }, { "dpo_loss": 0.17578125, "epoch": 0.88, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 3.851086381586483e-08, "loss": 0.1033, "projector_lr": 1.1553259144759449e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 2.734375, "rewards_train/rejected": -4.25, "sft_loss": 1.1015625, "step": 5472 }, { "dpo_loss": 0.047119140625, "epoch": 0.88, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 3.8413214536727135e-08, "loss": 0.0724, "projector_lr": 1.152396436101814e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1865234375, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.09375, "sft_loss": 0.65234375, "step": 5473 }, { "dpo_loss": 0.0269775390625, "epoch": 0.88, "final_loss": 0.0269775390625, "grad_norm": 0.0, "learning_rate": 3.83156842701699e-08, "loss": 0.0284, "projector_lr": 1.1494705281050971e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.9375, "sft_loss": 0.73046875, "step": 5474 }, { "dpo_loss": 0.115234375, "epoch": 0.88, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 3.8218273041339347e-08, "loss": 0.0953, "projector_lr": 1.1465481912401805e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.212890625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.125, "sft_loss": 0.546875, "step": 5475 }, { "dpo_loss": 0.2021484375, "epoch": 0.88, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 3.812098087535165e-08, "loss": 0.1431, "projector_lr": 1.1436294262605496e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1171875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.5625, "sft_loss": 0.83984375, "step": 5476 }, { "dpo_loss": 0.3125, "epoch": 0.88, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 3.802380779729175e-08, "loss": 0.2319, "projector_lr": 1.1407142339187527e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.15625, "sft_loss": 0.97265625, "step": 5477 }, { "dpo_loss": 0.08251953125, "epoch": 0.88, "final_loss": 0.08251953125, "grad_norm": 0.0, "learning_rate": 3.79267538322145e-08, "loss": 0.0844, "projector_lr": 1.137802614966435e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 6.9375, "rewards_train/rejected": -8.0, "sft_loss": 0.6953125, "step": 5478 }, { "dpo_loss": 0.05078125, "epoch": 0.88, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 3.782981900514326e-08, "loss": 0.0852, "projector_lr": 1.1348945701542979e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.28125, "sft_loss": 0.72265625, "step": 5479 }, { "dpo_loss": 0.006622314453125, "epoch": 0.88, "final_loss": 0.006622314453125, "grad_norm": 0.0, "learning_rate": 3.773300334107138e-08, "loss": 0.1954, "projector_lr": 1.1319901002321415e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.8125, "sft_loss": 0.69921875, "step": 5480 }, { "dpo_loss": 0.09423828125, "epoch": 0.88, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 3.7636306864961e-08, "loss": 0.1408, "projector_lr": 1.12908920594883e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.625, "sft_loss": 0.828125, "step": 5481 }, { "dpo_loss": 0.068359375, "epoch": 0.88, "final_loss": 0.068359375, "grad_norm": 0.0, "learning_rate": 3.75397296017439e-08, "loss": 0.1059, "projector_lr": 1.126191888052317e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 6.625, "rewards_train/rejected": -7.4375, "sft_loss": 0.85546875, "step": 5482 }, { "dpo_loss": 0.1494140625, "epoch": 0.88, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 3.7443271576320824e-08, "loss": 0.3043, "projector_lr": 1.1232981472896247e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.734375, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.5625, "sft_loss": 0.609375, "step": 5483 }, { "dpo_loss": 0.09716796875, "epoch": 0.88, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 3.734693281356188e-08, "loss": 0.1237, "projector_lr": 1.1204079844068566e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06884765625, "rewards_train/margins": 4.71875, "rewards_train/rejected": -4.78125, "sft_loss": 0.7265625, "step": 5484 }, { "dpo_loss": 0.0380859375, "epoch": 0.88, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 3.7250713338306427e-08, "loss": 0.0379, "projector_lr": 1.1175214001491929e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.1875, "sft_loss": 0.74609375, "step": 5485 }, { "dpo_loss": 0.046142578125, "epoch": 0.88, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 3.7154613175363226e-08, "loss": 0.0888, "projector_lr": 1.1146383952608968e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 6.3125, "rewards_train/rejected": -6.75, "sft_loss": 0.5703125, "step": 5486 }, { "dpo_loss": 0.1845703125, "epoch": 0.88, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 3.705863234951001e-08, "loss": 0.1259, "projector_lr": 1.1117589704853004e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.109375, "rewards_train/margins": 3.703125, "rewards_train/rejected": -4.8125, "sft_loss": 0.8359375, "step": 5487 }, { "dpo_loss": 0.2412109375, "epoch": 0.88, "final_loss": 0.2412109375, "grad_norm": 0.0, "learning_rate": 3.6962770885493766e-08, "loss": 0.2039, "projector_lr": 1.108883126564813e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 2.828125, "rewards_train/rejected": -3.984375, "sft_loss": 0.8125, "step": 5488 }, { "dpo_loss": 0.005889892578125, "epoch": 0.88, "final_loss": 0.005889892578125, "grad_norm": 0.0, "learning_rate": 3.6867028808031055e-08, "loss": 0.0361, "projector_lr": 1.1060108642409316e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.388671875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.5625, "sft_loss": 0.8359375, "step": 5489 }, { "dpo_loss": 0.1259765625, "epoch": 0.88, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 3.677140614180718e-08, "loss": 0.3057, "projector_lr": 1.1031421842542156e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.21875, "sft_loss": 0.828125, "step": 5490 }, { "dpo_loss": 0.053466796875, "epoch": 0.88, "final_loss": 0.053466796875, "grad_norm": 0.0, "learning_rate": 3.667590291147693e-08, "loss": 0.2009, "projector_lr": 1.1002770873443079e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.6875, "sft_loss": 0.8046875, "step": 5491 }, { "dpo_loss": 0.0546875, "epoch": 0.88, "final_loss": 0.0546875, "grad_norm": 0.0, "learning_rate": 3.6580519141664325e-08, "loss": 0.1005, "projector_lr": 1.0974155742499298e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.1875, "sft_loss": 0.83984375, "step": 5492 }, { "dpo_loss": 0.94140625, "epoch": 0.88, "final_loss": 0.94140625, "grad_norm": 0.0, "learning_rate": 3.648525485696241e-08, "loss": 0.4944, "projector_lr": 1.0945576457088724e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.140625, "rewards_train/margins": 2.9375, "rewards_train/rejected": -4.09375, "sft_loss": 0.6796875, "step": 5493 }, { "dpo_loss": 0.01336669921875, "epoch": 0.88, "final_loss": 0.01336669921875, "grad_norm": 0.0, "learning_rate": 3.639011008193354e-08, "loss": 0.2555, "projector_lr": 1.0917033024580065e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 8.5, "rewards_train/rejected": -9.0, "sft_loss": 0.72265625, "step": 5494 }, { "dpo_loss": 0.0233154296875, "epoch": 0.88, "final_loss": 0.0233154296875, "grad_norm": 0.0, "learning_rate": 3.6295084841109324e-08, "loss": 0.0217, "projector_lr": 1.0888525452332798e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.125, "sft_loss": 0.85546875, "step": 5495 }, { "dpo_loss": 0.11865234375, "epoch": 0.88, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 3.6200179158990375e-08, "loss": 0.1077, "projector_lr": 1.0860053747697113e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.0252685546875, "rewards_train/margins": 6.25, "rewards_train/rejected": -6.21875, "sft_loss": 0.5703125, "step": 5496 }, { "dpo_loss": 0.1962890625, "epoch": 0.88, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 3.6105393060046564e-08, "loss": 0.1729, "projector_lr": 1.0831617918013969e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.78125, "sft_loss": 0.8203125, "step": 5497 }, { "dpo_loss": 0.17578125, "epoch": 0.88, "final_loss": 0.17578125, "grad_norm": 0.0, "learning_rate": 3.6010726568716896e-08, "loss": 0.1501, "projector_lr": 1.0803217970615071e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 4.34375, "rewards_train/rejected": -6.28125, "sft_loss": 0.70703125, "step": 5498 }, { "dpo_loss": 0.1650390625, "epoch": 0.88, "final_loss": 0.1650390625, "grad_norm": 0.0, "learning_rate": 3.591617970940969e-08, "loss": 0.0929, "projector_lr": 1.0774853912822908e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.125, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.03125, "sft_loss": 0.84375, "step": 5499 }, { "dpo_loss": 0.25390625, "epoch": 0.88, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 3.582175250650216e-08, "loss": 0.1692, "projector_lr": 1.0746525751950648e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.375, "sft_loss": 0.64453125, "step": 5500 }, { "dpo_loss": 0.265625, "epoch": 0.88, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 3.5727444984341047e-08, "loss": 0.1762, "projector_lr": 1.0718233495302315e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.59375, "sft_loss": 0.85546875, "step": 5501 }, { "dpo_loss": 0.16796875, "epoch": 0.88, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 3.563325716724169e-08, "loss": 0.1948, "projector_lr": 1.0689977150172509e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6484375, "rewards_train/margins": 5.4375, "rewards_train/rejected": -7.09375, "sft_loss": 1.1328125, "step": 5502 }, { "dpo_loss": 0.02685546875, "epoch": 0.88, "final_loss": 0.02685546875, "grad_norm": 0.0, "learning_rate": 3.5539189079489055e-08, "loss": 0.3547, "projector_lr": 1.0661756723846716e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -7.125, "sft_loss": 1.3046875, "step": 5503 }, { "dpo_loss": 0.3046875, "epoch": 0.88, "final_loss": 0.3046875, "grad_norm": 0.0, "learning_rate": 3.5445240745337e-08, "loss": 0.1714, "projector_lr": 1.0633572223601102e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.875, "sft_loss": 1.03125, "step": 5504 }, { "dpo_loss": 0.04931640625, "epoch": 0.88, "final_loss": 0.04931640625, "grad_norm": 0.0, "learning_rate": 3.535141218900872e-08, "loss": 0.2203, "projector_lr": 1.0605423656702617e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.03125, "sft_loss": 0.859375, "step": 5505 }, { "dpo_loss": 0.0439453125, "epoch": 0.88, "final_loss": 0.0439453125, "grad_norm": 0.0, "learning_rate": 3.525770343469608e-08, "loss": 0.0223, "projector_lr": 1.0577311030408826e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.15625, "sft_loss": 0.79296875, "step": 5506 }, { "dpo_loss": 0.1259765625, "epoch": 0.88, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 3.5164114506560536e-08, "loss": 0.0993, "projector_lr": 1.0549234351968162e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.6875, "sft_loss": 0.65234375, "step": 5507 }, { "dpo_loss": 0.1083984375, "epoch": 0.88, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 3.507064542873239e-08, "loss": 0.1798, "projector_lr": 1.0521193628619718e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.59375, "sft_loss": 0.8203125, "step": 5508 }, { "dpo_loss": 0.046630859375, "epoch": 0.88, "final_loss": 0.046630859375, "grad_norm": 0.0, "learning_rate": 3.4977296225311205e-08, "loss": 0.1728, "projector_lr": 1.0493188867593362e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.875, "sft_loss": 0.796875, "step": 5509 }, { "dpo_loss": 0.103515625, "epoch": 0.88, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 3.48840669203655e-08, "loss": 0.0987, "projector_lr": 1.0465220076109649e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.8125, "sft_loss": 0.63671875, "step": 5510 }, { "dpo_loss": 0.00958251953125, "epoch": 0.88, "final_loss": 0.00958251953125, "grad_norm": 0.0, "learning_rate": 3.4790957537932877e-08, "loss": 0.1681, "projector_lr": 1.0437287261379863e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.6875, "sft_loss": 0.640625, "step": 5511 }, { "dpo_loss": 0.51953125, "epoch": 0.88, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 3.4697968102020024e-08, "loss": 0.318, "projector_lr": 1.0409390430606008e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 3.84375, "rewards_train/rejected": -5.28125, "sft_loss": 0.66796875, "step": 5512 }, { "dpo_loss": 0.039794921875, "epoch": 0.88, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 3.460509863660288e-08, "loss": 0.2867, "projector_lr": 1.0381529590980865e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 7.0, "rewards_train/rejected": -7.8125, "sft_loss": 0.68359375, "step": 5513 }, { "dpo_loss": 0.0224609375, "epoch": 0.88, "final_loss": 0.0224609375, "grad_norm": 0.0, "learning_rate": 3.4512349165626176e-08, "loss": 0.0267, "projector_lr": 1.0353704749687853e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.53125, "sft_loss": 0.6171875, "step": 5514 }, { "dpo_loss": 0.162109375, "epoch": 0.88, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 3.4419719713003946e-08, "loss": 0.113, "projector_lr": 1.0325915913901185e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.4375, "sft_loss": 0.77734375, "step": 5515 }, { "dpo_loss": 0.291015625, "epoch": 0.88, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 3.432721030261909e-08, "loss": 0.3122, "projector_lr": 1.0298163090785728e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.71875, "sft_loss": 0.640625, "step": 5516 }, { "dpo_loss": 0.173828125, "epoch": 0.88, "final_loss": 0.173828125, "grad_norm": 0.0, "learning_rate": 3.42348209583237e-08, "loss": 0.1033, "projector_lr": 1.027044628749711e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.21875, "sft_loss": 0.62109375, "step": 5517 }, { "dpo_loss": 0.050537109375, "epoch": 0.88, "final_loss": 0.050537109375, "grad_norm": 0.0, "learning_rate": 3.414255170393876e-08, "loss": 0.1163, "projector_lr": 1.0242765511181629e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.96875, "sft_loss": 0.73828125, "step": 5518 }, { "dpo_loss": 0.68359375, "epoch": 0.88, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 3.405040256325464e-08, "loss": 0.4839, "projector_lr": 1.0215120768976394e-07, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.390625, "rewards_train/margins": 2.28125, "rewards_train/rejected": -3.671875, "sft_loss": 0.65234375, "step": 5519 }, { "dpo_loss": 0.06494140625, "epoch": 0.88, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 3.3958373560030175e-08, "loss": 0.1605, "projector_lr": 1.0187512068009052e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.3125, "sft_loss": 0.90625, "step": 5520 }, { "dpo_loss": 0.1474609375, "epoch": 0.88, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 3.386646471799365e-08, "loss": 0.1024, "projector_lr": 1.0159939415398095e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.1875, "sft_loss": 0.9453125, "step": 5521 }, { "dpo_loss": 0.11669921875, "epoch": 0.88, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 3.3774676060842276e-08, "loss": 0.062, "projector_lr": 1.0132402818252684e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 6.5625, "rewards_train/rejected": -7.75, "sft_loss": 0.96875, "step": 5522 }, { "dpo_loss": 0.142578125, "epoch": 0.88, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 3.368300761224235e-08, "loss": 0.2129, "projector_lr": 1.0104902283672707e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.625, "sft_loss": 1.1328125, "step": 5523 }, { "dpo_loss": 0.45703125, "epoch": 0.88, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 3.35914593958288e-08, "loss": 0.4954, "projector_lr": 1.0077437818748641e-07, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 2.71875, "rewards_train/rejected": -3.59375, "sft_loss": 0.91015625, "step": 5524 }, { "dpo_loss": 0.1474609375, "epoch": 0.88, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 3.350003143520608e-08, "loss": 0.1636, "projector_lr": 1.0050009430561824e-07, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.875, "sft_loss": 0.60546875, "step": 5525 }, { "dpo_loss": 0.12353515625, "epoch": 0.88, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 3.340872375394727e-08, "loss": 0.2888, "projector_lr": 1.002261712618418e-07, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.78125, "sft_loss": 0.80078125, "step": 5526 }, { "dpo_loss": 0.11474609375, "epoch": 0.88, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 3.3317536375594644e-08, "loss": 0.0691, "projector_lr": 9.995260912678395e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.34375, "sft_loss": 0.6171875, "step": 5527 }, { "dpo_loss": 0.048095703125, "epoch": 0.88, "final_loss": 0.048095703125, "grad_norm": 0.0, "learning_rate": 3.32264693236593e-08, "loss": 0.0607, "projector_lr": 9.967940797097791e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.3125, "sft_loss": 0.53125, "step": 5528 }, { "dpo_loss": 0.0247802734375, "epoch": 0.88, "final_loss": 0.0247802734375, "grad_norm": 0.0, "learning_rate": 3.3135522621621445e-08, "loss": 0.0175, "projector_lr": 9.940656786486436e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.15625, "sft_loss": 0.69921875, "step": 5529 }, { "dpo_loss": 0.259765625, "epoch": 0.88, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 3.304469629293011e-08, "loss": 0.1799, "projector_lr": 9.913408887879032e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.59375, "sft_loss": 0.88671875, "step": 5530 }, { "dpo_loss": 0.142578125, "epoch": 0.88, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 3.295399036100344e-08, "loss": 0.1094, "projector_lr": 9.886197108301032e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.9375, "sft_loss": 0.73828125, "step": 5531 }, { "dpo_loss": 0.189453125, "epoch": 0.89, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 3.286340484922845e-08, "loss": 0.175, "projector_lr": 9.859021454768536e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.34375, "sft_loss": 1.21875, "step": 5532 }, { "dpo_loss": 0.11572265625, "epoch": 0.89, "final_loss": 0.11572265625, "grad_norm": 0.0, "learning_rate": 3.277293978096107e-08, "loss": 0.1012, "projector_lr": 9.831881934288323e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.345703125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.15625, "sft_loss": 0.703125, "step": 5533 }, { "dpo_loss": 0.451171875, "epoch": 0.89, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 3.268259517952637e-08, "loss": 0.4623, "projector_lr": 9.804778553857913e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.953125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.5, "sft_loss": 1.09375, "step": 5534 }, { "dpo_loss": 0.2890625, "epoch": 0.89, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 3.259237106821816e-08, "loss": 0.2019, "projector_lr": 9.777711320465449e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.40625, "sft_loss": 0.828125, "step": 5535 }, { "dpo_loss": 0.30078125, "epoch": 0.89, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 3.250226747029916e-08, "loss": 0.2373, "projector_lr": 9.750680241089749e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.53125, "sft_loss": 0.69140625, "step": 5536 }, { "dpo_loss": 0.0986328125, "epoch": 0.89, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 3.241228440900123e-08, "loss": 0.408, "projector_lr": 9.723685322700371e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.09375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.5625, "sft_loss": 0.6640625, "step": 5537 }, { "dpo_loss": 0.006378173828125, "epoch": 0.89, "final_loss": 0.006378173828125, "grad_norm": 0.0, "learning_rate": 3.232242190752499e-08, "loss": 0.013, "projector_lr": 9.696726572257497e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4375, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.6875, "sft_loss": 1.0078125, "step": 5538 }, { "dpo_loss": 0.0113525390625, "epoch": 0.89, "final_loss": 0.0113525390625, "grad_norm": 0.0, "learning_rate": 3.223267998903995e-08, "loss": 0.0215, "projector_lr": 9.669803996711984e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.33203125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -5.875, "sft_loss": 0.74609375, "step": 5539 }, { "dpo_loss": 0.40625, "epoch": 0.89, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 3.214305867668465e-08, "loss": 0.571, "projector_lr": 9.642917603005397e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.1875, "sft_loss": 1.28125, "step": 5540 }, { "dpo_loss": 0.09228515625, "epoch": 0.89, "final_loss": 0.09228515625, "grad_norm": 0.0, "learning_rate": 3.2053557993566515e-08, "loss": 0.2866, "projector_lr": 9.616067398069955e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0625, "rewards_train/margins": 6.5, "rewards_train/rejected": -8.5625, "sft_loss": 1.4296875, "step": 5541 }, { "dpo_loss": 0.2001953125, "epoch": 0.89, "final_loss": 0.2001953125, "grad_norm": 0.0, "learning_rate": 3.196417796276174e-08, "loss": 0.4189, "projector_lr": 9.589253388828522e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.72265625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -5.875, "sft_loss": 0.80078125, "step": 5542 }, { "dpo_loss": 0.103515625, "epoch": 0.89, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 3.187491860731545e-08, "loss": 0.0864, "projector_lr": 9.562475582194635e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.9375, "sft_loss": 0.875, "step": 5543 }, { "dpo_loss": 0.07958984375, "epoch": 0.89, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 3.17857799502419e-08, "loss": 0.1342, "projector_lr": 9.53573398507257e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.75, "rewards_train/rejected": -6.0625, "sft_loss": 0.78125, "step": 5544 }, { "dpo_loss": 0.19140625, "epoch": 0.89, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 3.169676201452387e-08, "loss": 0.1874, "projector_lr": 9.509028604357162e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.125, "sft_loss": 0.74609375, "step": 5545 }, { "dpo_loss": 0.038330078125, "epoch": 0.89, "final_loss": 0.038330078125, "grad_norm": 0.0, "learning_rate": 3.1607864823113336e-08, "loss": 0.3092, "projector_lr": 9.482359446934003e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.59375, "sft_loss": 0.6796875, "step": 5546 }, { "dpo_loss": 0.05078125, "epoch": 0.89, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 3.151908839893075e-08, "loss": 0.1196, "projector_lr": 9.455726519679225e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 6.65625, "rewards_train/rejected": -7.59375, "sft_loss": 0.57421875, "step": 5547 }, { "dpo_loss": 0.458984375, "epoch": 0.89, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 3.143043276486579e-08, "loss": 0.3539, "projector_lr": 9.429129829459737e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.5625, "sft_loss": 0.6796875, "step": 5548 }, { "dpo_loss": 0.07958984375, "epoch": 0.89, "final_loss": 0.07958984375, "grad_norm": 0.0, "learning_rate": 3.134189794377684e-08, "loss": 0.0585, "projector_lr": 9.402569383133053e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.29296875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.59375, "sft_loss": 0.703125, "step": 5549 }, { "dpo_loss": 0.053955078125, "epoch": 0.89, "final_loss": 0.053955078125, "grad_norm": 0.0, "learning_rate": 3.1253483958491254e-08, "loss": 0.095, "projector_lr": 9.376045187547377e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.875, "sft_loss": 0.6171875, "step": 5550 }, { "dpo_loss": 0.03662109375, "epoch": 0.89, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 3.116519083180491e-08, "loss": 0.1059, "projector_lr": 9.349557249541474e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.71875, "sft_loss": 0.90625, "step": 5551 }, { "dpo_loss": 0.4140625, "epoch": 0.89, "final_loss": 0.4140625, "grad_norm": 0.0, "learning_rate": 3.1077018586482926e-08, "loss": 0.2206, "projector_lr": 9.323105575944878e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 1.71875, "rewards_train/rejected": -3.578125, "sft_loss": 0.9296875, "step": 5552 }, { "dpo_loss": 0.2255859375, "epoch": 0.89, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 3.098896724525896e-08, "loss": 0.1254, "projector_lr": 9.296690173577688e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 3.5625, "rewards_train/rejected": -4.53125, "sft_loss": 0.83984375, "step": 5553 }, { "dpo_loss": 0.04736328125, "epoch": 0.89, "final_loss": 0.04736328125, "grad_norm": 0.0, "learning_rate": 3.090103683083567e-08, "loss": 0.0294, "projector_lr": 9.270311049250701e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.0625, "sft_loss": 0.458984375, "step": 5554 }, { "dpo_loss": 0.2314453125, "epoch": 0.89, "final_loss": 0.2314453125, "grad_norm": 0.0, "learning_rate": 3.0813227365884486e-08, "loss": 0.3265, "projector_lr": 9.243968209765347e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.0, "sft_loss": 0.87109375, "step": 5555 }, { "dpo_loss": 0.2294921875, "epoch": 0.89, "final_loss": 0.2294921875, "grad_norm": 0.0, "learning_rate": 3.072553887304558e-08, "loss": 0.1462, "projector_lr": 9.217661661913674e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 2.0625, "rewards_train/rejected": -3.140625, "sft_loss": 0.765625, "step": 5556 }, { "dpo_loss": 0.73046875, "epoch": 0.89, "final_loss": 0.73046875, "grad_norm": 0.0, "learning_rate": 3.0637971374927964e-08, "loss": 0.7931, "projector_lr": 9.191391412478389e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.90625, "sft_loss": 0.66015625, "step": 5557 }, { "dpo_loss": 0.0036773681640625, "epoch": 0.89, "final_loss": 0.0036773681640625, "grad_norm": 0.0, "learning_rate": 3.055052489410958e-08, "loss": 0.0992, "projector_lr": 9.165157468232876e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 6.84375, "rewards_train/rejected": -8.1875, "sft_loss": 0.76171875, "step": 5558 }, { "dpo_loss": 0.2890625, "epoch": 0.89, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 3.0463199453137024e-08, "loss": 0.1783, "projector_lr": 9.138959835941107e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 2.578125, "rewards_train/rejected": -4.09375, "sft_loss": 0.83984375, "step": 5559 }, { "dpo_loss": 0.345703125, "epoch": 0.89, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 3.037599507452576e-08, "loss": 0.2275, "projector_lr": 9.112798522357729e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.125, "sft_loss": 0.6640625, "step": 5560 }, { "dpo_loss": 0.4765625, "epoch": 0.89, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 3.0288911780759874e-08, "loss": 0.4698, "projector_lr": 9.086673534227963e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.453125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.84375, "sft_loss": 0.78515625, "step": 5561 }, { "dpo_loss": 0.19140625, "epoch": 0.89, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 3.0201949594292505e-08, "loss": 0.1624, "projector_lr": 9.060584878287754e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.0625, "sft_loss": 0.9140625, "step": 5562 }, { "dpo_loss": 0.4609375, "epoch": 0.89, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 3.011510853754534e-08, "loss": 0.4359, "projector_lr": 9.034532561263603e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 3.65625, "rewards_train/rejected": -4.0625, "sft_loss": 0.8828125, "step": 5563 }, { "dpo_loss": 0.3671875, "epoch": 0.89, "final_loss": 0.3671875, "grad_norm": 0.0, "learning_rate": 3.0028388632909127e-08, "loss": 0.2173, "projector_lr": 9.00851658987274e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.318359375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.0, "sft_loss": 0.69140625, "step": 5564 }, { "dpo_loss": 0.0079345703125, "epoch": 0.89, "final_loss": 0.0079345703125, "grad_norm": 0.0, "learning_rate": 2.994178990274282e-08, "loss": 0.0543, "projector_lr": 8.982536970822847e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.28125, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.5625, "sft_loss": 0.83984375, "step": 5565 }, { "dpo_loss": 0.030517578125, "epoch": 0.89, "final_loss": 0.030517578125, "grad_norm": 0.0, "learning_rate": 2.9855312369374775e-08, "loss": 0.0183, "projector_lr": 8.956593710812433e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.953125, "rewards_train/margins": 5.375, "rewards_train/rejected": -7.34375, "sft_loss": 0.74609375, "step": 5566 }, { "dpo_loss": 0.1884765625, "epoch": 0.89, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 2.97689560551016e-08, "loss": 0.2938, "projector_lr": 8.930686816530481e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.53125, "sft_loss": 0.48828125, "step": 5567 }, { "dpo_loss": 0.486328125, "epoch": 0.89, "final_loss": 0.486328125, "grad_norm": 0.0, "learning_rate": 2.968272098218899e-08, "loss": 0.3294, "projector_lr": 8.904816294656698e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.375, "rewards_train/margins": 3.75, "rewards_train/rejected": -5.125, "sft_loss": 0.7421875, "step": 5568 }, { "dpo_loss": 0.08740234375, "epoch": 0.89, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 2.959660717287127e-08, "loss": 0.205, "projector_lr": 8.878982151861382e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.15625, "sft_loss": 0.578125, "step": 5569 }, { "dpo_loss": 0.029541015625, "epoch": 0.89, "final_loss": 0.029541015625, "grad_norm": 0.0, "learning_rate": 2.9510614649351397e-08, "loss": 0.2926, "projector_lr": 8.853184394805419e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.033203125, "rewards_train/margins": 5.0625, "rewards_train/rejected": -5.09375, "sft_loss": 0.60546875, "step": 5570 }, { "dpo_loss": 0.388671875, "epoch": 0.89, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 2.942474343380108e-08, "loss": 0.3536, "projector_lr": 8.827423030140324e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.671875, "rewards_train/rejected": -5.1875, "sft_loss": 1.09375, "step": 5571 }, { "dpo_loss": 0.158203125, "epoch": 0.89, "final_loss": 0.158203125, "grad_norm": 0.0, "learning_rate": 2.933899354836089e-08, "loss": 0.22, "projector_lr": 8.801698064508268e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.0625, "sft_loss": 1.2734375, "step": 5572 }, { "dpo_loss": 0.0228271484375, "epoch": 0.89, "final_loss": 0.0228271484375, "grad_norm": 0.0, "learning_rate": 2.925336501514003e-08, "loss": 0.0729, "projector_lr": 8.776009504542009e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.423828125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.1875, "sft_loss": 0.6875, "step": 5573 }, { "dpo_loss": 0.1904296875, "epoch": 0.89, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 2.9167857856216337e-08, "loss": 0.1153, "projector_lr": 8.750357356864902e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.37109375, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.4375, "sft_loss": 0.67578125, "step": 5574 }, { "dpo_loss": 0.2470703125, "epoch": 0.89, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 2.9082472093636568e-08, "loss": 0.2939, "projector_lr": 8.72474162809097e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.21875, "sft_loss": 0.8125, "step": 5575 }, { "dpo_loss": 0.02880859375, "epoch": 0.89, "final_loss": 0.02880859375, "grad_norm": 0.0, "learning_rate": 2.8997207749415885e-08, "loss": 0.0345, "projector_lr": 8.699162324824766e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.9375, "sft_loss": 0.796875, "step": 5576 }, { "dpo_loss": 0.142578125, "epoch": 0.89, "final_loss": 0.142578125, "grad_norm": 0.0, "learning_rate": 2.8912064845538432e-08, "loss": 0.2678, "projector_lr": 8.67361945366153e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.53125, "sft_loss": 0.6640625, "step": 5577 }, { "dpo_loss": 0.039794921875, "epoch": 0.89, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 2.8827043403956754e-08, "loss": 0.0564, "projector_lr": 8.648113021187026e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.65625, "sft_loss": 0.83203125, "step": 5578 }, { "dpo_loss": 0.0211181640625, "epoch": 0.89, "final_loss": 0.0211181640625, "grad_norm": 0.0, "learning_rate": 2.874214344659237e-08, "loss": 0.0207, "projector_lr": 8.622643033977712e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.95703125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.875, "sft_loss": 0.86328125, "step": 5579 }, { "dpo_loss": 0.0703125, "epoch": 0.89, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 2.8657364995335277e-08, "loss": 0.0905, "projector_lr": 8.597209498600584e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.34375, "sft_loss": 0.89453125, "step": 5580 }, { "dpo_loss": 0.30078125, "epoch": 0.89, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 2.857270807204426e-08, "loss": 0.2128, "projector_lr": 8.571812421613279e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.84375, "rewards_train/margins": 3.078125, "rewards_train/rejected": -3.921875, "sft_loss": 0.94921875, "step": 5581 }, { "dpo_loss": 0.078125, "epoch": 0.89, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 2.8488172698546753e-08, "loss": 0.0934, "projector_lr": 8.546451809564027e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.1875, "sft_loss": 2.09375, "step": 5582 }, { "dpo_loss": 0.166015625, "epoch": 0.89, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 2.8403758896638707e-08, "loss": 0.2444, "projector_lr": 8.521127668991613e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.34375, "sft_loss": 0.78515625, "step": 5583 }, { "dpo_loss": 0.07373046875, "epoch": 0.89, "final_loss": 0.07373046875, "grad_norm": 0.0, "learning_rate": 2.831946668808477e-08, "loss": 0.0515, "projector_lr": 8.495840006425431e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 6.09375, "rewards_train/rejected": -7.5625, "sft_loss": 0.8515625, "step": 5584 }, { "dpo_loss": 0.1787109375, "epoch": 0.89, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 2.8235296094618556e-08, "loss": 0.1488, "projector_lr": 8.470588828385566e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.3125, "sft_loss": 1.1640625, "step": 5585 }, { "dpo_loss": 0.345703125, "epoch": 0.89, "final_loss": 0.345703125, "grad_norm": 0.0, "learning_rate": 2.815124713794187e-08, "loss": 0.1773, "projector_lr": 8.445374141382562e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.328125, "rewards_train/rejected": -4.84375, "sft_loss": 1.015625, "step": 5586 }, { "dpo_loss": 0.1259765625, "epoch": 0.89, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 2.8067319839725433e-08, "loss": 0.0907, "projector_lr": 8.420195951917631e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.0625, "sft_loss": 1.7890625, "step": 5587 }, { "dpo_loss": 0.035888671875, "epoch": 0.89, "final_loss": 0.035888671875, "grad_norm": 0.0, "learning_rate": 2.798351422160844e-08, "loss": 0.0816, "projector_lr": 8.395054266482532e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.125, "sft_loss": 0.6640625, "step": 5588 }, { "dpo_loss": 0.09375, "epoch": 0.89, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 2.7899830305198935e-08, "loss": 0.0855, "projector_lr": 8.369949091559681e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.03125, "sft_loss": 0.7265625, "step": 5589 }, { "dpo_loss": 0.1357421875, "epoch": 0.89, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 2.7816268112073327e-08, "loss": 0.1653, "projector_lr": 8.344880433621999e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.90625, "sft_loss": 0.89453125, "step": 5590 }, { "dpo_loss": 0.134765625, "epoch": 0.89, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 2.773282766377688e-08, "loss": 0.113, "projector_lr": 8.319848299133065e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.0, "sft_loss": 0.921875, "step": 5591 }, { "dpo_loss": 0.0576171875, "epoch": 0.89, "final_loss": 0.0576171875, "grad_norm": 0.0, "learning_rate": 2.7649508981823168e-08, "loss": 0.1569, "projector_lr": 8.29485269454695e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1552734375, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.0, "sft_loss": 0.71875, "step": 5592 }, { "dpo_loss": 0.220703125, "epoch": 0.89, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 2.756631208769472e-08, "loss": 0.1373, "projector_lr": 8.269893626308417e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.28125, "sft_loss": 0.79296875, "step": 5593 }, { "dpo_loss": 0.0306396484375, "epoch": 0.9, "final_loss": 0.0306396484375, "grad_norm": 0.0, "learning_rate": 2.748323700284233e-08, "loss": 0.0927, "projector_lr": 8.2449711008527e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.875, "sft_loss": 0.6875, "step": 5594 }, { "dpo_loss": 0.18359375, "epoch": 0.9, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 2.740028374868575e-08, "loss": 0.1093, "projector_lr": 8.220085124605725e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.984375, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.96875, "sft_loss": 0.8203125, "step": 5595 }, { "dpo_loss": 0.2265625, "epoch": 0.9, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 2.731745234661281e-08, "loss": 0.213, "projector_lr": 8.195235703983844e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 2.5, "rewards_train/rejected": -3.03125, "sft_loss": 0.8984375, "step": 5596 }, { "dpo_loss": 0.12353515625, "epoch": 0.9, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 2.723474281798055e-08, "loss": 0.132, "projector_lr": 8.170422845394165e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.765625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.09375, "sft_loss": 0.62890625, "step": 5597 }, { "dpo_loss": 0.1640625, "epoch": 0.9, "final_loss": 0.1640625, "grad_norm": 0.0, "learning_rate": 2.7152155184114066e-08, "loss": 0.2113, "projector_lr": 8.145646555234221e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.53125, "sft_loss": 0.82421875, "step": 5598 }, { "dpo_loss": 0.275390625, "epoch": 0.9, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 2.7069689466307278e-08, "loss": 0.3439, "projector_lr": 8.120906839892183e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.34375, "sft_loss": 0.77734375, "step": 5599 }, { "dpo_loss": 0.203125, "epoch": 0.9, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 2.6987345685822728e-08, "loss": 0.1665, "projector_lr": 8.096203705746819e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.5625, "sft_loss": 0.703125, "step": 5600 }, { "dpo_loss": 0.162109375, "epoch": 0.9, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 2.690512386389121e-08, "loss": 0.1471, "projector_lr": 8.071537159167364e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.8125, "sft_loss": 0.7421875, "step": 5601 }, { "dpo_loss": 0.251953125, "epoch": 0.9, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 2.6823024021712325e-08, "loss": 0.2824, "projector_lr": 8.046907206513698e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.6875, "sft_loss": 0.734375, "step": 5602 }, { "dpo_loss": 0.2470703125, "epoch": 0.9, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 2.6741046180454307e-08, "loss": 0.2128, "projector_lr": 8.022313854136292e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.828125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.3125, "sft_loss": 0.63671875, "step": 5603 }, { "dpo_loss": 0.0228271484375, "epoch": 0.9, "final_loss": 0.0228271484375, "grad_norm": 0.0, "learning_rate": 2.665919036125369e-08, "loss": 0.0603, "projector_lr": 7.997757108376109e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.396484375, "rewards_train/margins": 6.46875, "rewards_train/rejected": -6.875, "sft_loss": 0.71875, "step": 5604 }, { "dpo_loss": 0.302734375, "epoch": 0.9, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 2.657745658521571e-08, "loss": 0.215, "projector_lr": 7.973236975564712e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.5625, "sft_loss": 0.78515625, "step": 5605 }, { "dpo_loss": 0.00946044921875, "epoch": 0.9, "final_loss": 0.00946044921875, "grad_norm": 0.0, "learning_rate": 2.649584487341394e-08, "loss": 0.067, "projector_lr": 7.948753462024183e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.4375, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.1875, "sft_loss": 0.83203125, "step": 5606 }, { "dpo_loss": 0.00909423828125, "epoch": 0.9, "final_loss": 0.00909423828125, "grad_norm": 0.0, "learning_rate": 2.6414355246890786e-08, "loss": 0.0161, "projector_lr": 7.924306574067236e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30859375, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.3125, "sft_loss": 0.7109375, "step": 5607 }, { "dpo_loss": 0.07177734375, "epoch": 0.9, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 2.6332987726656818e-08, "loss": 0.0811, "projector_lr": 7.899896317997046e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.6875, "sft_loss": 0.98828125, "step": 5608 }, { "dpo_loss": 0.12353515625, "epoch": 0.9, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 2.6251742333691597e-08, "loss": 0.1767, "projector_lr": 7.875522700107479e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.671875, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.46875, "sft_loss": 0.7265625, "step": 5609 }, { "dpo_loss": 0.140625, "epoch": 0.9, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 2.617061908894275e-08, "loss": 0.1202, "projector_lr": 7.851185726682825e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.84375, "sft_loss": 0.91015625, "step": 5610 }, { "dpo_loss": 0.095703125, "epoch": 0.9, "final_loss": 0.095703125, "grad_norm": 0.0, "learning_rate": 2.6089618013326543e-08, "loss": 0.2199, "projector_lr": 7.826885403997963e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.21875, "sft_loss": 0.66796875, "step": 5611 }, { "dpo_loss": 0.34765625, "epoch": 0.9, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 2.6008739127727775e-08, "loss": 0.2419, "projector_lr": 7.802621738318333e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.65625, "sft_loss": 0.6171875, "step": 5612 }, { "dpo_loss": 0.171875, "epoch": 0.9, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 2.5927982452999818e-08, "loss": 0.1062, "projector_lr": 7.778394735899947e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.64453125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.46875, "sft_loss": 0.6171875, "step": 5613 }, { "dpo_loss": 0.43359375, "epoch": 0.9, "final_loss": 0.43359375, "grad_norm": 0.0, "learning_rate": 2.5847348009964464e-08, "loss": 0.3525, "projector_lr": 7.75420440298934e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.296875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.5625, "sft_loss": 0.8359375, "step": 5614 }, { "dpo_loss": 0.058349609375, "epoch": 0.9, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 2.576683581941186e-08, "loss": 0.1405, "projector_lr": 7.730050745823558e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.625, "sft_loss": 0.5703125, "step": 5615 }, { "dpo_loss": 0.10888671875, "epoch": 0.9, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 2.5686445902100894e-08, "loss": 0.116, "projector_lr": 7.705933770630269e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.3125, "sft_loss": 0.78515625, "step": 5616 }, { "dpo_loss": 0.34375, "epoch": 0.9, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 2.5606178278758717e-08, "loss": 0.2056, "projector_lr": 7.681853483627616e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 3.40625, "rewards_train/rejected": -3.9375, "sft_loss": 0.99609375, "step": 5617 }, { "dpo_loss": 0.047119140625, "epoch": 0.9, "final_loss": 0.047119140625, "grad_norm": 0.0, "learning_rate": 2.5526032970081046e-08, "loss": 0.0473, "projector_lr": 7.657809891024315e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.125, "sft_loss": 0.6875, "step": 5618 }, { "dpo_loss": 0.1689453125, "epoch": 0.9, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 2.544600999673191e-08, "loss": 0.1607, "projector_lr": 7.633802999019573e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.25, "sft_loss": 0.75, "step": 5619 }, { "dpo_loss": 0.04296875, "epoch": 0.9, "final_loss": 0.04296875, "grad_norm": 0.0, "learning_rate": 2.5366109379344138e-08, "loss": 0.1235, "projector_lr": 7.609832813803242e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 5.375, "rewards_train/rejected": -6.21875, "sft_loss": 0.88671875, "step": 5620 }, { "dpo_loss": 0.09814453125, "epoch": 0.9, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 2.528633113851858e-08, "loss": 0.225, "projector_lr": 7.585899341555575e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.125, "sft_loss": 0.734375, "step": 5621 }, { "dpo_loss": 0.296875, "epoch": 0.9, "final_loss": 0.296875, "grad_norm": 0.0, "learning_rate": 2.520667529482501e-08, "loss": 0.2141, "projector_lr": 7.562002588447503e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.5625, "sft_loss": 0.91015625, "step": 5622 }, { "dpo_loss": 0.0478515625, "epoch": 0.9, "final_loss": 0.0478515625, "grad_norm": 0.0, "learning_rate": 2.5127141868801048e-08, "loss": 0.051, "projector_lr": 7.538142560640315e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.125, "sft_loss": 0.859375, "step": 5623 }, { "dpo_loss": 0.306640625, "epoch": 0.9, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 2.5047730880953355e-08, "loss": 0.2851, "projector_lr": 7.514319264286007e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.890625, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.4375, "sft_loss": 0.85546875, "step": 5624 }, { "dpo_loss": 0.435546875, "epoch": 0.9, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 2.4968442351756548e-08, "loss": 0.2392, "projector_lr": 7.490532705526964e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.8125, "sft_loss": 0.75, "step": 5625 }, { "dpo_loss": 0.1962890625, "epoch": 0.9, "final_loss": 0.1962890625, "grad_norm": 0.0, "learning_rate": 2.488927630165405e-08, "loss": 0.2411, "projector_lr": 7.466782890496215e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.328125, "rewards_train/margins": 4.5, "rewards_train/rejected": -6.84375, "sft_loss": 0.94921875, "step": 5626 }, { "dpo_loss": 0.031494140625, "epoch": 0.9, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 2.4810232751057424e-08, "loss": 0.2063, "projector_lr": 7.443069825317227e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58203125, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.125, "sft_loss": 0.75, "step": 5627 }, { "dpo_loss": 0.0228271484375, "epoch": 0.9, "final_loss": 0.0228271484375, "grad_norm": 0.0, "learning_rate": 2.4731311720346813e-08, "loss": 0.1188, "projector_lr": 7.419393516104044e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30078125, "rewards_train/margins": 5.375, "rewards_train/rejected": -5.6875, "sft_loss": 0.91015625, "step": 5628 }, { "dpo_loss": 0.423828125, "epoch": 0.9, "final_loss": 0.423828125, "grad_norm": 0.0, "learning_rate": 2.465251322987061e-08, "loss": 0.4033, "projector_lr": 7.395753968961184e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.40625, "sft_loss": 0.9375, "step": 5629 }, { "dpo_loss": 0.0673828125, "epoch": 0.9, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 2.4573837299945887e-08, "loss": 0.0904, "projector_lr": 7.372151189983767e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.4375, "sft_loss": 0.69140625, "step": 5630 }, { "dpo_loss": 0.10546875, "epoch": 0.9, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 2.449528395085776e-08, "loss": 0.0789, "projector_lr": 7.348585185257329e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.0625, "sft_loss": 0.84765625, "step": 5631 }, { "dpo_loss": 0.06591796875, "epoch": 0.9, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 2.4416853202860022e-08, "loss": 0.1046, "projector_lr": 7.325055960858007e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.34375, "sft_loss": 0.75, "step": 5632 }, { "dpo_loss": 0.0771484375, "epoch": 0.9, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 2.4338545076174723e-08, "loss": 0.2023, "projector_lr": 7.301563522852417e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.9375, "sft_loss": 0.69921875, "step": 5633 }, { "dpo_loss": 0.625, "epoch": 0.9, "final_loss": 0.625, "grad_norm": 0.0, "learning_rate": 2.426035959099232e-08, "loss": 0.3428, "projector_lr": 7.278107877297696e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.46875, "sft_loss": 0.75, "step": 5634 }, { "dpo_loss": 0.03564453125, "epoch": 0.9, "final_loss": 0.03564453125, "grad_norm": 0.0, "learning_rate": 2.4182296767471632e-08, "loss": 0.0425, "projector_lr": 7.254689030241491e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.54296875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.71875, "sft_loss": 0.62109375, "step": 5635 }, { "dpo_loss": 0.09716796875, "epoch": 0.9, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 2.4104356625740064e-08, "loss": 0.0945, "projector_lr": 7.231306987722019e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.21875, "sft_loss": 0.96875, "step": 5636 }, { "dpo_loss": 0.12890625, "epoch": 0.9, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 2.4026539185892924e-08, "loss": 0.1419, "projector_lr": 7.207961755767878e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.40625, "sft_loss": 0.8203125, "step": 5637 }, { "dpo_loss": 0.130859375, "epoch": 0.9, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 2.394884446799439e-08, "loss": 0.1356, "projector_lr": 7.184653340398317e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.28125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.9375, "sft_loss": 0.9921875, "step": 5638 }, { "dpo_loss": 0.30859375, "epoch": 0.9, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 2.3871272492076544e-08, "loss": 0.1647, "projector_lr": 7.161381747622964e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4765625, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.5, "sft_loss": 0.93359375, "step": 5639 }, { "dpo_loss": 0.248046875, "epoch": 0.9, "final_loss": 0.248046875, "grad_norm": 0.0, "learning_rate": 2.3793823278140392e-08, "loss": 0.3846, "projector_lr": 7.138146983442117e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.28125, "rewards_train/margins": 3.5, "rewards_train/rejected": -4.78125, "sft_loss": 0.73828125, "step": 5640 }, { "dpo_loss": 0.08349609375, "epoch": 0.9, "final_loss": 0.08349609375, "grad_norm": 0.0, "learning_rate": 2.371649684615451e-08, "loss": 0.2688, "projector_lr": 7.114949053846353e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.6875, "sft_loss": 0.98046875, "step": 5641 }, { "dpo_loss": 0.365234375, "epoch": 0.9, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 2.3639293216056623e-08, "loss": 0.233, "projector_lr": 7.091787964816987e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 3.9375, "rewards_train/rejected": -5.6875, "sft_loss": 1.015625, "step": 5642 }, { "dpo_loss": 0.23046875, "epoch": 0.9, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 2.3562212407752136e-08, "loss": 0.1425, "projector_lr": 7.068663722325641e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.4375, "sft_loss": 0.82421875, "step": 5643 }, { "dpo_loss": 0.0537109375, "epoch": 0.9, "final_loss": 0.0537109375, "grad_norm": 0.0, "learning_rate": 2.348525444111532e-08, "loss": 0.048, "projector_lr": 7.045576332334597e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.28125, "sft_loss": 1.0625, "step": 5644 }, { "dpo_loss": 0.310546875, "epoch": 0.9, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 2.3408419335988362e-08, "loss": 0.3294, "projector_lr": 7.022525800796509e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.5625, "sft_loss": 0.8203125, "step": 5645 }, { "dpo_loss": 0.125, "epoch": 0.9, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 2.333170711218202e-08, "loss": 0.1901, "projector_lr": 6.999512133654607e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.375, "sft_loss": 0.66796875, "step": 5646 }, { "dpo_loss": 0.007171630859375, "epoch": 0.9, "final_loss": 0.007171630859375, "grad_norm": 0.0, "learning_rate": 2.3255117789475142e-08, "loss": 0.1471, "projector_lr": 6.976535336842544e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1748046875, "rewards_train/margins": 6.8125, "rewards_train/rejected": -6.625, "sft_loss": 0.59765625, "step": 5647 }, { "dpo_loss": 0.0986328125, "epoch": 0.9, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 2.317865138761521e-08, "loss": 0.3786, "projector_lr": 6.953595416284564e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.96875, "sft_loss": 0.765625, "step": 5648 }, { "dpo_loss": 0.26953125, "epoch": 0.9, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 2.3102307926317788e-08, "loss": 0.1902, "projector_lr": 6.930692377895337e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.1875, "sft_loss": 0.71875, "step": 5649 }, { "dpo_loss": 0.051513671875, "epoch": 0.9, "final_loss": 0.051513671875, "grad_norm": 0.0, "learning_rate": 2.3026087425266627e-08, "loss": 0.092, "projector_lr": 6.907826227579989e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.271484375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.65625, "sft_loss": 0.62109375, "step": 5650 }, { "dpo_loss": 0.06591796875, "epoch": 0.9, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 2.294998990411412e-08, "loss": 0.0453, "projector_lr": 6.884996971234236e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.375, "sft_loss": 0.7421875, "step": 5651 }, { "dpo_loss": 0.1474609375, "epoch": 0.9, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 2.2874015382480737e-08, "loss": 0.2221, "projector_lr": 6.862204614744222e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.515625, "rewards_train/margins": 3.640625, "rewards_train/rejected": -5.15625, "sft_loss": 0.71484375, "step": 5652 }, { "dpo_loss": 0.1728515625, "epoch": 0.9, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 2.279816387995509e-08, "loss": 0.2969, "projector_lr": 6.839449163986528e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.0625, "sft_loss": 0.8984375, "step": 5653 }, { "dpo_loss": 0.126953125, "epoch": 0.9, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 2.2722435416094476e-08, "loss": 0.2073, "projector_lr": 6.816730624828344e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.45703125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.34375, "sft_loss": 0.7265625, "step": 5654 }, { "dpo_loss": 0.326171875, "epoch": 0.9, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 2.2646830010424166e-08, "loss": 0.1762, "projector_lr": 6.794049003127251e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.890625, "rewards_train/rejected": -4.90625, "sft_loss": 0.66796875, "step": 5655 }, { "dpo_loss": 0.287109375, "epoch": 0.9, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 2.2571347682437626e-08, "loss": 0.3586, "projector_lr": 6.771404304731288e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.546875, "rewards_train/rejected": -4.90625, "sft_loss": 0.82421875, "step": 5656 }, { "dpo_loss": 0.1328125, "epoch": 0.91, "final_loss": 0.1328125, "grad_norm": 0.0, "learning_rate": 2.2495988451596947e-08, "loss": 0.2487, "projector_lr": 6.748796535479085e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.41796875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.03125, "sft_loss": 0.79296875, "step": 5657 }, { "dpo_loss": 0.1181640625, "epoch": 0.91, "final_loss": 0.1181640625, "grad_norm": 0.0, "learning_rate": 2.2420752337332204e-08, "loss": 0.0917, "projector_lr": 6.726225701199662e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1640625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.5625, "sft_loss": 0.8046875, "step": 5658 }, { "dpo_loss": 0.12451171875, "epoch": 0.91, "final_loss": 0.12451171875, "grad_norm": 0.0, "learning_rate": 2.234563935904177e-08, "loss": 0.1494, "projector_lr": 6.703691807712531e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -7.40625, "sft_loss": 0.84375, "step": 5659 }, { "dpo_loss": 0.0194091796875, "epoch": 0.91, "final_loss": 0.0194091796875, "grad_norm": 0.0, "learning_rate": 2.22706495360922e-08, "loss": 0.0393, "projector_lr": 6.681194860827661e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.79296875, "rewards_train/margins": 6.5, "rewards_train/rejected": -7.28125, "sft_loss": 1.0078125, "step": 5660 }, { "dpo_loss": 0.2451171875, "epoch": 0.91, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 2.2195782887818536e-08, "loss": 0.1321, "projector_lr": 6.658734866345562e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.953125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.90625, "sft_loss": 0.80078125, "step": 5661 }, { "dpo_loss": 0.125, "epoch": 0.91, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 2.2121039433523836e-08, "loss": 0.1527, "projector_lr": 6.636311830057151e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.34375, "sft_loss": 0.8671875, "step": 5662 }, { "dpo_loss": 0.2490234375, "epoch": 0.91, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 2.204641919247957e-08, "loss": 0.1886, "projector_lr": 6.613925757743872e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.625, "sft_loss": 0.6484375, "step": 5663 }, { "dpo_loss": 0.091796875, "epoch": 0.91, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 2.1971922183925186e-08, "loss": 0.0481, "projector_lr": 6.591576655177556e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.84375, "sft_loss": 0.7265625, "step": 5664 }, { "dpo_loss": 0.2490234375, "epoch": 0.91, "final_loss": 0.2490234375, "grad_norm": 0.0, "learning_rate": 2.1897548427068645e-08, "loss": 0.3412, "projector_lr": 6.569264528120594e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.640625, "rewards_train/margins": 3.53125, "rewards_train/rejected": -5.15625, "sft_loss": 0.8359375, "step": 5665 }, { "dpo_loss": 0.154296875, "epoch": 0.91, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 2.182329794108595e-08, "loss": 0.0816, "projector_lr": 6.546989382325786e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.107421875, "rewards_train/margins": 6.59375, "rewards_train/rejected": -6.6875, "sft_loss": 0.46875, "step": 5666 }, { "dpo_loss": 0.15625, "epoch": 0.91, "final_loss": 0.15625, "grad_norm": 0.0, "learning_rate": 2.1749170745121447e-08, "loss": 0.0882, "projector_lr": 6.524751223536435e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.439453125, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.40625, "sft_loss": 0.76171875, "step": 5667 }, { "dpo_loss": 0.1279296875, "epoch": 0.91, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 2.167516685828741e-08, "loss": 0.3297, "projector_lr": 6.502550057486223e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.21875, "sft_loss": 0.83203125, "step": 5668 }, { "dpo_loss": 0.1162109375, "epoch": 0.91, "final_loss": 0.1162109375, "grad_norm": 0.0, "learning_rate": 2.160128629966479e-08, "loss": 0.0656, "projector_lr": 6.480385889899437e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 3.796875, "rewards_train/rejected": -4.9375, "sft_loss": 0.89453125, "step": 5669 }, { "dpo_loss": 0.0283203125, "epoch": 0.91, "final_loss": 0.0283203125, "grad_norm": 0.0, "learning_rate": 2.1527529088302243e-08, "loss": 0.0971, "projector_lr": 6.458258726490674e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 6.71875, "rewards_train/rejected": -7.34375, "sft_loss": 0.58203125, "step": 5670 }, { "dpo_loss": 0.1357421875, "epoch": 0.91, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 2.1453895243217058e-08, "loss": 0.0955, "projector_lr": 6.436168572965117e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.21875, "sft_loss": 0.984375, "step": 5671 }, { "dpo_loss": 0.267578125, "epoch": 0.91, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 2.138038478339438e-08, "loss": 0.2799, "projector_lr": 6.414115435018313e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.25, "sft_loss": 1.46875, "step": 5672 }, { "dpo_loss": 0.08837890625, "epoch": 0.91, "final_loss": 0.08837890625, "grad_norm": 0.0, "learning_rate": 2.1306997727787767e-08, "loss": 0.0815, "projector_lr": 6.39209931833633e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.34375, "sft_loss": 0.671875, "step": 5673 }, { "dpo_loss": 0.150390625, "epoch": 0.91, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 2.12337340953187e-08, "loss": 0.1425, "projector_lr": 6.37012022859561e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.85546875, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.25, "sft_loss": 0.84375, "step": 5674 }, { "dpo_loss": 0.0673828125, "epoch": 0.91, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 2.1160593904877234e-08, "loss": 0.0468, "projector_lr": 6.34817817146317e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.4375, "sft_loss": 0.73046875, "step": 5675 }, { "dpo_loss": 0.2119140625, "epoch": 0.91, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 2.1087577175321226e-08, "loss": 0.2199, "projector_lr": 6.326273152596368e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.046875, "rewards_train/rejected": -4.5625, "sft_loss": 0.88671875, "step": 5676 }, { "dpo_loss": 0.0634765625, "epoch": 0.91, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 2.1014683925476895e-08, "loss": 0.3217, "projector_lr": 6.304405177643069e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.421875, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.5, "sft_loss": 0.73046875, "step": 5677 }, { "dpo_loss": 0.1767578125, "epoch": 0.91, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 2.0941914174138485e-08, "loss": 0.091, "projector_lr": 6.282574252241546e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.71875, "sft_loss": 0.90234375, "step": 5678 }, { "dpo_loss": 0.2470703125, "epoch": 0.91, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 2.0869267940068547e-08, "loss": 0.1358, "projector_lr": 6.260780382020564e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.486328125, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.46875, "sft_loss": 0.98046875, "step": 5679 }, { "dpo_loss": 0.291015625, "epoch": 0.91, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 2.0796745241997702e-08, "loss": 0.1971, "projector_lr": 6.239023572599311e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.671875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.625, "sft_loss": 0.6796875, "step": 5680 }, { "dpo_loss": 0.166015625, "epoch": 0.91, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 2.072434609862489e-08, "loss": 0.111, "projector_lr": 6.217303829587467e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.0625, "sft_loss": 0.73828125, "step": 5681 }, { "dpo_loss": 0.53125, "epoch": 0.91, "final_loss": 0.53125, "grad_norm": 0.0, "learning_rate": 2.0652070528616726e-08, "loss": 0.3802, "projector_lr": 6.195621158585019e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -3.703125, "sft_loss": 0.93359375, "step": 5682 }, { "dpo_loss": 0.00787353515625, "epoch": 0.91, "final_loss": 0.00787353515625, "grad_norm": 0.0, "learning_rate": 2.057991855060859e-08, "loss": 0.0639, "projector_lr": 6.173975565182577e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10302734375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.4375, "sft_loss": 0.80078125, "step": 5683 }, { "dpo_loss": 0.67578125, "epoch": 0.91, "final_loss": 0.67578125, "grad_norm": 0.0, "learning_rate": 2.050789018320348e-08, "loss": 0.5246, "projector_lr": 6.152367054961044e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.40625, "sft_loss": 1.046875, "step": 5684 }, { "dpo_loss": 0.203125, "epoch": 0.91, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 2.0435985444972824e-08, "loss": 0.2431, "projector_lr": 6.130795633491848e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.5625, "sft_loss": 0.59765625, "step": 5685 }, { "dpo_loss": 0.123046875, "epoch": 0.91, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 2.0364204354456073e-08, "loss": 0.0874, "projector_lr": 6.109261306336822e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.5, "sft_loss": 0.81640625, "step": 5686 }, { "dpo_loss": 0.07666015625, "epoch": 0.91, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 2.0292546930160803e-08, "loss": 0.1647, "projector_lr": 6.087764079048241e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.65625, "sft_loss": 0.734375, "step": 5687 }, { "dpo_loss": 0.09326171875, "epoch": 0.91, "final_loss": 0.09326171875, "grad_norm": 0.0, "learning_rate": 2.022101319056263e-08, "loss": 0.0563, "projector_lr": 6.06630395716879e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.39453125, "rewards_train/margins": 6.875, "rewards_train/rejected": -6.46875, "sft_loss": 0.70703125, "step": 5688 }, { "dpo_loss": 0.0216064453125, "epoch": 0.91, "final_loss": 0.0216064453125, "grad_norm": 0.0, "learning_rate": 2.0149603154105466e-08, "loss": 0.2585, "projector_lr": 6.04488094623164e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1279296875, "rewards_train/margins": 4.8125, "rewards_train/rejected": -4.6875, "sft_loss": 0.6328125, "step": 5689 }, { "dpo_loss": 0.1513671875, "epoch": 0.91, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 2.0078316839201192e-08, "loss": 0.2097, "projector_lr": 6.023495051760358e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.71875, "rewards_train/rejected": -5.96875, "sft_loss": 0.8515625, "step": 5690 }, { "dpo_loss": 0.166015625, "epoch": 0.91, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 2.000715426422972e-08, "loss": 0.1879, "projector_lr": 6.002146279268917e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.25, "rewards_train/margins": 3.8125, "rewards_train/rejected": -5.0625, "sft_loss": 0.84765625, "step": 5691 }, { "dpo_loss": 0.6328125, "epoch": 0.91, "final_loss": 0.6328125, "grad_norm": 0.0, "learning_rate": 1.993611544753926e-08, "loss": 0.3504, "projector_lr": 5.980834634261779e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.203125, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.78125, "sft_loss": 0.8515625, "step": 5692 }, { "dpo_loss": 0.302734375, "epoch": 0.91, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 1.9865200407445993e-08, "loss": 0.2162, "projector_lr": 5.959560122233798e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.9375, "sft_loss": 0.796875, "step": 5693 }, { "dpo_loss": 0.1376953125, "epoch": 0.91, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 1.9794409162234182e-08, "loss": 0.1167, "projector_lr": 5.938322748670255e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7890625, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.3125, "sft_loss": 1.0234375, "step": 5694 }, { "dpo_loss": 0.011962890625, "epoch": 0.91, "final_loss": 0.011962890625, "grad_norm": 0.0, "learning_rate": 1.9723741730156117e-08, "loss": 0.131, "projector_lr": 5.917122519046836e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.15625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.59375, "sft_loss": 0.71875, "step": 5695 }, { "dpo_loss": 0.1142578125, "epoch": 0.91, "final_loss": 0.1142578125, "grad_norm": 0.0, "learning_rate": 1.9653198129432336e-08, "loss": 0.1171, "projector_lr": 5.895959438829701e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.171875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -7.0625, "sft_loss": 1.234375, "step": 5696 }, { "dpo_loss": 0.458984375, "epoch": 0.91, "final_loss": 0.458984375, "grad_norm": 0.0, "learning_rate": 1.9582778378251287e-08, "loss": 0.6385, "projector_lr": 5.874833513475386e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 2.9375, "rewards_train/rejected": -3.859375, "sft_loss": 1.0234375, "step": 5697 }, { "dpo_loss": 0.259765625, "epoch": 0.91, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 1.951248249476961e-08, "loss": 0.3427, "projector_lr": 5.853744748430884e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9375, "rewards_train/margins": 3.390625, "rewards_train/rejected": -5.34375, "sft_loss": 0.91796875, "step": 5698 }, { "dpo_loss": 0.08642578125, "epoch": 0.91, "final_loss": 0.08642578125, "grad_norm": 0.0, "learning_rate": 1.9442310497111923e-08, "loss": 0.1157, "projector_lr": 5.832693149133578e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.5, "sft_loss": 0.6015625, "step": 5699 }, { "dpo_loss": 0.048828125, "epoch": 0.91, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 1.937226240337092e-08, "loss": 0.0661, "projector_lr": 5.811678721011277e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.140625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.34375, "sft_loss": 0.6328125, "step": 5700 }, { "dpo_loss": 0.27734375, "epoch": 0.91, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 1.930233823160726e-08, "loss": 0.1981, "projector_lr": 5.790701469482179e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.296875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.03125, "sft_loss": 0.87890625, "step": 5701 }, { "dpo_loss": 0.478515625, "epoch": 0.91, "final_loss": 0.478515625, "grad_norm": 0.0, "learning_rate": 1.923253799984992e-08, "loss": 0.4005, "projector_lr": 5.7697613999549757e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.25, "sft_loss": 0.72265625, "step": 5702 }, { "dpo_loss": 0.10400390625, "epoch": 0.91, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 1.9162861726095604e-08, "loss": 0.3672, "projector_lr": 5.748858517828681e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.90625, "sft_loss": 0.91796875, "step": 5703 }, { "dpo_loss": 0.0262451171875, "epoch": 0.91, "final_loss": 0.0262451171875, "grad_norm": 0.0, "learning_rate": 1.9093309428309277e-08, "loss": 0.0798, "projector_lr": 5.727992828492784e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 7.09375, "rewards_train/rejected": -7.84375, "sft_loss": 0.84765625, "step": 5704 }, { "dpo_loss": 0.046875, "epoch": 0.91, "final_loss": 0.046875, "grad_norm": 0.0, "learning_rate": 1.9023881124423767e-08, "loss": 0.2392, "projector_lr": 5.707164337327131e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.70703125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.5, "sft_loss": 0.671875, "step": 5705 }, { "dpo_loss": 0.1201171875, "epoch": 0.91, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 1.8954576832340142e-08, "loss": 0.1463, "projector_lr": 5.686373049702043e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.130859375, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.125, "sft_loss": 0.7265625, "step": 5706 }, { "dpo_loss": 0.05078125, "epoch": 0.91, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 1.888539656992727e-08, "loss": 0.3978, "projector_lr": 5.665618970978181e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.1875, "sft_loss": 0.7734375, "step": 5707 }, { "dpo_loss": 0.63671875, "epoch": 0.91, "final_loss": 0.63671875, "grad_norm": 0.0, "learning_rate": 1.8816340355022276e-08, "loss": 0.3755, "projector_lr": 5.644902106506683e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.5625, "sft_loss": 0.71875, "step": 5708 }, { "dpo_loss": 0.12353515625, "epoch": 0.91, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 1.874740820542997e-08, "loss": 0.0669, "projector_lr": 5.6242224616289916e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.71875, "sft_loss": 0.5703125, "step": 5709 }, { "dpo_loss": 0.89453125, "epoch": 0.91, "final_loss": 0.89453125, "grad_norm": 0.0, "learning_rate": 1.8678600138923584e-08, "loss": 0.6809, "projector_lr": 5.603580041677076e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.640625, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.5625, "sft_loss": 0.8671875, "step": 5710 }, { "dpo_loss": 0.1845703125, "epoch": 0.91, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 1.860991617324398e-08, "loss": 0.1993, "projector_lr": 5.5829748519731935e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.0, "sft_loss": 0.74609375, "step": 5711 }, { "dpo_loss": 0.328125, "epoch": 0.91, "final_loss": 0.328125, "grad_norm": 0.0, "learning_rate": 1.8541356326100433e-08, "loss": 0.3459, "projector_lr": 5.56240689783013e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.125, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.796875, "sft_loss": 0.8359375, "step": 5712 }, { "dpo_loss": 0.9296875, "epoch": 0.91, "final_loss": 0.9296875, "grad_norm": 0.0, "learning_rate": 1.847292061516964e-08, "loss": 0.5558, "projector_lr": 5.5418761845508925e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 2.640625, "rewards_train/rejected": -4.5, "sft_loss": 0.91015625, "step": 5713 }, { "dpo_loss": 0.1025390625, "epoch": 0.91, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 1.8404609058096878e-08, "loss": 0.0608, "projector_lr": 5.521382717429063e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.02490234375, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.34375, "sft_loss": 0.6328125, "step": 5714 }, { "dpo_loss": 0.0303955078125, "epoch": 0.91, "final_loss": 0.0303955078125, "grad_norm": 0.0, "learning_rate": 1.833642167249505e-08, "loss": 0.151, "projector_lr": 5.500926501748516e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.78125, "sft_loss": 0.69140625, "step": 5715 }, { "dpo_loss": 0.01153564453125, "epoch": 0.91, "final_loss": 0.01153564453125, "grad_norm": 0.0, "learning_rate": 1.8268358475945213e-08, "loss": 0.1454, "projector_lr": 5.480507542783564e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.5234375, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.78125, "sft_loss": 0.74609375, "step": 5716 }, { "dpo_loss": 0.1396484375, "epoch": 0.91, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 1.8200419485996322e-08, "loss": 0.1678, "projector_lr": 5.4601258457988964e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.8125, "sft_loss": 0.59765625, "step": 5717 }, { "dpo_loss": 0.236328125, "epoch": 0.91, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 1.813260472016537e-08, "loss": 0.2274, "projector_lr": 5.439781416049611e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.484375, "sft_loss": 0.6640625, "step": 5718 }, { "dpo_loss": 0.11328125, "epoch": 0.92, "final_loss": 0.11328125, "grad_norm": 0.0, "learning_rate": 1.8064914195937143e-08, "loss": 0.0754, "projector_lr": 5.419474258781143e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.15625, "sft_loss": 0.82421875, "step": 5719 }, { "dpo_loss": 0.1455078125, "epoch": 0.92, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 1.7997347930764682e-08, "loss": 0.0964, "projector_lr": 5.3992043792294054e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.96875, "sft_loss": 1.3203125, "step": 5720 }, { "dpo_loss": 0.0238037109375, "epoch": 0.92, "final_loss": 0.0238037109375, "grad_norm": 0.0, "learning_rate": 1.7929905942068835e-08, "loss": 0.0401, "projector_lr": 5.378971782620651e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1845703125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -4.8125, "sft_loss": 0.53125, "step": 5721 }, { "dpo_loss": 0.1494140625, "epoch": 0.92, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 1.7862588247238353e-08, "loss": 0.4652, "projector_lr": 5.358776474171506e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.8125, "sft_loss": 1.5859375, "step": 5722 }, { "dpo_loss": 0.1435546875, "epoch": 0.92, "final_loss": 0.1435546875, "grad_norm": 0.0, "learning_rate": 1.7795394863629907e-08, "loss": 0.0834, "projector_lr": 5.338618459088973e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.609375, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.8125, "sft_loss": 1.1796875, "step": 5723 }, { "dpo_loss": 0.134765625, "epoch": 0.92, "final_loss": 0.134765625, "grad_norm": 0.0, "learning_rate": 1.7728325808568423e-08, "loss": 0.0888, "projector_lr": 5.318497742570527e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.71875, "sft_loss": 0.82421875, "step": 5724 }, { "dpo_loss": 0.40625, "epoch": 0.92, "final_loss": 0.40625, "grad_norm": 0.0, "learning_rate": 1.7661381099346394e-08, "loss": 0.2293, "projector_lr": 5.298414329803919e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.4375, "rewards_train/rejected": -4.78125, "sft_loss": 0.8828125, "step": 5725 }, { "dpo_loss": 0.0064697265625, "epoch": 0.92, "final_loss": 0.0064697265625, "grad_norm": 0.0, "learning_rate": 1.7594560753224464e-08, "loss": 0.0582, "projector_lr": 5.278368225967339e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 7.25, "rewards_train/rejected": -7.75, "sft_loss": 0.5859375, "step": 5726 }, { "dpo_loss": 0.205078125, "epoch": 0.92, "final_loss": 0.205078125, "grad_norm": 0.0, "learning_rate": 1.7527864787431235e-08, "loss": 0.1959, "projector_lr": 5.2583594362293706e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.125, "sft_loss": 0.65625, "step": 5727 }, { "dpo_loss": 0.07421875, "epoch": 0.92, "final_loss": 0.07421875, "grad_norm": 0.0, "learning_rate": 1.7461293219163066e-08, "loss": 0.0886, "projector_lr": 5.23838796574892e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.6171875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -4.90625, "sft_loss": 0.8984375, "step": 5728 }, { "dpo_loss": 0.322265625, "epoch": 0.92, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 1.7394846065584334e-08, "loss": 0.6679, "projector_lr": 5.218453819675301e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.49609375, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.375, "sft_loss": 0.76953125, "step": 5729 }, { "dpo_loss": 0.018798828125, "epoch": 0.92, "final_loss": 0.018798828125, "grad_norm": 0.0, "learning_rate": 1.732852334382745e-08, "loss": 0.0334, "projector_lr": 5.198557003148236e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 7.15625, "rewards_train/rejected": -8.5625, "sft_loss": 0.8125, "step": 5730 }, { "dpo_loss": 0.265625, "epoch": 0.92, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 1.7262325070992568e-08, "loss": 0.2361, "projector_lr": 5.178697521297771e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6953125, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.71875, "sft_loss": 0.9140625, "step": 5731 }, { "dpo_loss": 0.0966796875, "epoch": 0.92, "final_loss": 0.0966796875, "grad_norm": 0.0, "learning_rate": 1.7196251264147808e-08, "loss": 0.083, "projector_lr": 5.158875379244343e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2001953125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -4.3125, "sft_loss": 0.5859375, "step": 5732 }, { "dpo_loss": 0.08935546875, "epoch": 0.92, "final_loss": 0.08935546875, "grad_norm": 0.0, "learning_rate": 1.7130301940329327e-08, "loss": 0.1351, "projector_lr": 5.139090582098799e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.84765625, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.5, "sft_loss": 1.109375, "step": 5733 }, { "dpo_loss": 0.50390625, "epoch": 0.92, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 1.7064477116540967e-08, "loss": 0.2736, "projector_lr": 5.1193431349622897e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.03125, "rewards_train/rejected": -5.5, "sft_loss": 0.7265625, "step": 5734 }, { "dpo_loss": 0.09765625, "epoch": 0.92, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 1.699877680975459e-08, "loss": 0.1555, "projector_lr": 5.099633042926377e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.6875, "sft_loss": 0.5546875, "step": 5735 }, { "dpo_loss": 0.060791015625, "epoch": 0.92, "final_loss": 0.060791015625, "grad_norm": 0.0, "learning_rate": 1.693320103690993e-08, "loss": 0.0778, "projector_lr": 5.0799603110729786e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.7109375, "rewards_train/margins": 3.984375, "rewards_train/rejected": -5.6875, "sft_loss": 0.9453125, "step": 5736 }, { "dpo_loss": 0.080078125, "epoch": 0.92, "final_loss": 0.080078125, "grad_norm": 0.0, "learning_rate": 1.6867749814914734e-08, "loss": 0.1391, "projector_lr": 5.06032494447442e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.78125, "sft_loss": 0.81640625, "step": 5737 }, { "dpo_loss": 0.10400390625, "epoch": 0.92, "final_loss": 0.10400390625, "grad_norm": 0.0, "learning_rate": 1.680242316064434e-08, "loss": 0.1156, "projector_lr": 5.0407269481933026e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.25, "sft_loss": 0.8046875, "step": 5738 }, { "dpo_loss": 0.267578125, "epoch": 0.92, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 1.6737221090942444e-08, "loss": 0.2325, "projector_lr": 5.0211663272827335e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.734375, "rewards_train/rejected": -4.65625, "sft_loss": 0.9453125, "step": 5739 }, { "dpo_loss": 0.62890625, "epoch": 0.92, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 1.6672143622619983e-08, "loss": 0.3738, "projector_lr": 5.001643086785995e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.171875, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.59375, "sft_loss": 0.8359375, "step": 5740 }, { "dpo_loss": 0.052734375, "epoch": 0.92, "final_loss": 0.052734375, "grad_norm": 0.0, "learning_rate": 1.660719077245637e-08, "loss": 0.0331, "projector_lr": 4.9821572317369114e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.90625, "sft_loss": 0.65625, "step": 5741 }, { "dpo_loss": 0.310546875, "epoch": 0.92, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 1.654236255719843e-08, "loss": 0.1577, "projector_lr": 4.9627087671595294e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 2.578125, "rewards_train/rejected": -3.8125, "sft_loss": 0.82421875, "step": 5742 }, { "dpo_loss": 0.61328125, "epoch": 0.92, "final_loss": 0.61328125, "grad_norm": 0.0, "learning_rate": 1.6477658993561293e-08, "loss": 0.4325, "projector_lr": 4.943297698068388e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 3.140625, "rewards_train/rejected": -4.5625, "sft_loss": 0.96484375, "step": 5743 }, { "dpo_loss": 0.447265625, "epoch": 0.92, "final_loss": 0.447265625, "grad_norm": 0.0, "learning_rate": 1.641308009822756e-08, "loss": 0.225, "projector_lr": 4.923924029468269e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6171875, "rewards_train/margins": 2.375, "rewards_train/rejected": -4.0, "sft_loss": 0.5703125, "step": 5744 }, { "dpo_loss": 0.03076171875, "epoch": 0.92, "final_loss": 0.03076171875, "grad_norm": 0.0, "learning_rate": 1.6348625887847854e-08, "loss": 0.1184, "projector_lr": 4.904587766354357e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.125, "sft_loss": 0.66796875, "step": 5745 }, { "dpo_loss": 0.244140625, "epoch": 0.92, "final_loss": 0.244140625, "grad_norm": 0.0, "learning_rate": 1.6284296379040607e-08, "loss": 0.2567, "projector_lr": 4.8852889137121825e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.546875, "rewards_train/margins": 3.1875, "rewards_train/rejected": -5.75, "sft_loss": 0.67578125, "step": 5746 }, { "dpo_loss": 0.06787109375, "epoch": 0.92, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 1.6220091588392216e-08, "loss": 0.1043, "projector_lr": 4.866027476517665e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.06982421875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.59375, "sft_loss": 0.5859375, "step": 5747 }, { "dpo_loss": 0.1474609375, "epoch": 0.92, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 1.6156011532456772e-08, "loss": 0.3652, "projector_lr": 4.8468034597370315e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6875, "rewards_train/margins": 3.59375, "rewards_train/rejected": -5.28125, "sft_loss": 1.0546875, "step": 5748 }, { "dpo_loss": 0.017578125, "epoch": 0.92, "final_loss": 0.017578125, "grad_norm": 0.0, "learning_rate": 1.6092056227756334e-08, "loss": 0.0809, "projector_lr": 4.827616868326901e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.765625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.0625, "sft_loss": 0.671875, "step": 5749 }, { "dpo_loss": 0.1474609375, "epoch": 0.92, "final_loss": 0.1474609375, "grad_norm": 0.0, "learning_rate": 1.602822569078055e-08, "loss": 0.2916, "projector_lr": 4.808467707234165e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.4375, "sft_loss": 1.1875, "step": 5750 }, { "dpo_loss": 0.00872802734375, "epoch": 0.92, "final_loss": 0.00872802734375, "grad_norm": 0.0, "learning_rate": 1.596451993798731e-08, "loss": 0.0837, "projector_lr": 4.789355981396193e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 7.78125, "rewards_train/rejected": -9.0, "sft_loss": 1.09375, "step": 5751 }, { "dpo_loss": 0.111328125, "epoch": 0.92, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 1.5900938985801916e-08, "loss": 0.1072, "projector_lr": 4.770281695740575e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 2.953125, "rewards_train/rejected": -4.3125, "sft_loss": 0.81640625, "step": 5752 }, { "dpo_loss": 0.019775390625, "epoch": 0.92, "final_loss": 0.019775390625, "grad_norm": 0.0, "learning_rate": 1.5837482850617812e-08, "loss": 0.1443, "projector_lr": 4.751244855185344e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.65625, "sft_loss": 0.796875, "step": 5753 }, { "dpo_loss": 0.0247802734375, "epoch": 0.92, "final_loss": 0.0247802734375, "grad_norm": 0.0, "learning_rate": 1.5774151548795966e-08, "loss": 0.1025, "projector_lr": 4.73224546463879e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.25390625, "rewards_train/margins": 6.6875, "rewards_train/rejected": -6.9375, "sft_loss": 0.7578125, "step": 5754 }, { "dpo_loss": 0.04052734375, "epoch": 0.92, "final_loss": 0.04052734375, "grad_norm": 0.0, "learning_rate": 1.5710945096665484e-08, "loss": 0.1539, "projector_lr": 4.713283528999646e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.171875, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.0625, "sft_loss": 0.89453125, "step": 5755 }, { "dpo_loss": 0.228515625, "epoch": 0.92, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 1.5647863510522995e-08, "loss": 0.1331, "projector_lr": 4.6943590531568994e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.81640625, "rewards_train/margins": 1.8046875, "rewards_train/rejected": -2.625, "sft_loss": 0.8671875, "step": 5756 }, { "dpo_loss": 0.439453125, "epoch": 0.92, "final_loss": 0.439453125, "grad_norm": 0.0, "learning_rate": 1.558490680663316e-08, "loss": 0.229, "projector_lr": 4.675472041989948e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 3.734375, "rewards_train/rejected": -5.0, "sft_loss": 0.78125, "step": 5757 }, { "dpo_loss": 0.125, "epoch": 0.92, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 1.552207500122815e-08, "loss": 0.1505, "projector_lr": 4.656622500368446e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.69140625, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.9375, "sft_loss": 0.671875, "step": 5758 }, { "dpo_loss": 0.07568359375, "epoch": 0.92, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 1.5459368110508297e-08, "loss": 0.0548, "projector_lr": 4.63781043315249e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.59375, "sft_loss": 0.65625, "step": 5759 }, { "dpo_loss": 0.275390625, "epoch": 0.92, "final_loss": 0.275390625, "grad_norm": 0.0, "learning_rate": 1.5396786150641438e-08, "loss": 0.2193, "projector_lr": 4.619035845192432e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.03125, "sft_loss": 0.76953125, "step": 5760 }, { "dpo_loss": 0.1611328125, "epoch": 0.92, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 1.5334329137763337e-08, "loss": 0.1109, "projector_lr": 4.600298741329001e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.703125, "sft_loss": 0.76953125, "step": 5761 }, { "dpo_loss": 0.0732421875, "epoch": 0.92, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 1.5271997087977552e-08, "loss": 0.0739, "projector_lr": 4.581599126393265e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.65625, "sft_loss": 1.03125, "step": 5762 }, { "dpo_loss": 0.0390625, "epoch": 0.92, "final_loss": 0.0390625, "grad_norm": 0.0, "learning_rate": 1.520979001735534e-08, "loss": 0.1386, "projector_lr": 4.562937005206602e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.310546875, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.15625, "sft_loss": 0.77734375, "step": 5763 }, { "dpo_loss": 0.38671875, "epoch": 0.92, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 1.5147707941935707e-08, "loss": 0.346, "projector_lr": 4.544312382580712e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.875, "rewards_train/margins": 1.90625, "rewards_train/rejected": -3.78125, "sft_loss": 0.6171875, "step": 5764 }, { "dpo_loss": 0.12353515625, "epoch": 0.92, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 1.5085750877725623e-08, "loss": 0.0895, "projector_lr": 4.525725263317687e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.1875, "sft_loss": 0.6171875, "step": 5765 }, { "dpo_loss": 0.396484375, "epoch": 0.92, "final_loss": 0.396484375, "grad_norm": 0.0, "learning_rate": 1.5023918840699645e-08, "loss": 0.2662, "projector_lr": 4.507175652209894e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 3.71875, "rewards_train/rejected": -4.625, "sft_loss": 0.703125, "step": 5766 }, { "dpo_loss": 0.01953125, "epoch": 0.92, "final_loss": 0.01953125, "grad_norm": 0.0, "learning_rate": 1.4962211846800076e-08, "loss": 0.1398, "projector_lr": 4.4886635540400234e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.1875, "sft_loss": 0.609375, "step": 5767 }, { "dpo_loss": 0.119140625, "epoch": 0.92, "final_loss": 0.119140625, "grad_norm": 0.0, "learning_rate": 1.4900629911937245e-08, "loss": 0.1, "projector_lr": 4.470188973581174e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2265625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.875, "sft_loss": 0.71875, "step": 5768 }, { "dpo_loss": 0.045654296875, "epoch": 0.92, "final_loss": 0.045654296875, "grad_norm": 0.0, "learning_rate": 1.4839173051988841e-08, "loss": 0.1711, "projector_lr": 4.4517519155966523e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.734375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.78125, "sft_loss": 0.83984375, "step": 5769 }, { "dpo_loss": 0.14453125, "epoch": 0.92, "final_loss": 0.14453125, "grad_norm": 0.0, "learning_rate": 1.4777841282800573e-08, "loss": 0.1597, "projector_lr": 4.433352384840173e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.5, "sft_loss": 0.74609375, "step": 5770 }, { "dpo_loss": 0.09619140625, "epoch": 0.92, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 1.4716634620185908e-08, "loss": 0.0709, "projector_lr": 4.414990386055773e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.69921875, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.21875, "sft_loss": 0.98828125, "step": 5771 }, { "dpo_loss": 0.0135498046875, "epoch": 0.92, "final_loss": 0.0135498046875, "grad_norm": 0.0, "learning_rate": 1.4655553079925887e-08, "loss": 0.1803, "projector_lr": 4.396665923977766e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.40625, "rewards_train/margins": 6.78125, "rewards_train/rejected": -8.1875, "sft_loss": 0.92578125, "step": 5772 }, { "dpo_loss": 0.09130859375, "epoch": 0.92, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 1.4594596677769355e-08, "loss": 0.3224, "projector_lr": 4.378379003330807e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6484375, "rewards_train/margins": 6.96875, "rewards_train/rejected": -7.625, "sft_loss": 0.88671875, "step": 5773 }, { "dpo_loss": 0.09716796875, "epoch": 0.92, "final_loss": 0.09716796875, "grad_norm": 0.0, "learning_rate": 1.4533765429433076e-08, "loss": 0.1024, "projector_lr": 4.3601296288299234e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.75, "rewards_train/margins": 3.953125, "rewards_train/rejected": -4.6875, "sft_loss": 0.67578125, "step": 5774 }, { "dpo_loss": 0.208984375, "epoch": 0.92, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 1.4473059350601279e-08, "loss": 0.12, "projector_lr": 4.3419178051803844e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9296875, "rewards_train/margins": 3.234375, "rewards_train/rejected": -4.1875, "sft_loss": 0.734375, "step": 5775 }, { "dpo_loss": 0.1796875, "epoch": 0.92, "final_loss": 0.1796875, "grad_norm": 0.0, "learning_rate": 1.4412478456926003e-08, "loss": 0.0957, "projector_lr": 4.3237435370778014e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.09375, "sft_loss": 0.828125, "step": 5776 }, { "dpo_loss": 0.28125, "epoch": 0.92, "final_loss": 0.28125, "grad_norm": 0.0, "learning_rate": 1.4352022764027082e-08, "loss": 0.1988, "projector_lr": 4.305606829208125e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.96484375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.625, "sft_loss": 1.1171875, "step": 5777 }, { "dpo_loss": 0.33984375, "epoch": 0.92, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 1.4291692287492052e-08, "loss": 0.2722, "projector_lr": 4.287507686247616e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.328125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -6.53125, "sft_loss": 0.87890625, "step": 5778 }, { "dpo_loss": 0.1533203125, "epoch": 0.92, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 1.4231487042876078e-08, "loss": 0.1514, "projector_lr": 4.2694461128628236e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 5.78125, "rewards_train/rejected": -6.34375, "sft_loss": 0.66015625, "step": 5779 }, { "dpo_loss": 0.2138671875, "epoch": 0.92, "final_loss": 0.2138671875, "grad_norm": 0.0, "learning_rate": 1.4171407045702188e-08, "loss": 0.1509, "projector_lr": 4.251422113710657e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.71875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.78125, "sft_loss": 0.77734375, "step": 5780 }, { "dpo_loss": 0.020263671875, "epoch": 0.92, "final_loss": 0.020263671875, "grad_norm": 0.0, "learning_rate": 1.411145231146088e-08, "loss": 0.4238, "projector_lr": 4.2334356934382646e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8671875, "rewards_train/margins": 6.3125, "rewards_train/rejected": -7.1875, "sft_loss": 0.6640625, "step": 5781 }, { "dpo_loss": 0.74609375, "epoch": 0.93, "final_loss": 0.74609375, "grad_norm": 0.0, "learning_rate": 1.4051622855610623e-08, "loss": 0.4165, "projector_lr": 4.2154868566831874e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.18359375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.53125, "sft_loss": 0.703125, "step": 5782 }, { "dpo_loss": 0.038818359375, "epoch": 0.93, "final_loss": 0.038818359375, "grad_norm": 0.0, "learning_rate": 1.3991918693577354e-08, "loss": 0.0467, "projector_lr": 4.1975756080732067e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.212890625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.0, "sft_loss": 0.69921875, "step": 5783 }, { "dpo_loss": 0.138671875, "epoch": 0.93, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 1.3932339840754981e-08, "loss": 0.2143, "projector_lr": 4.179701952226495e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 2.734375, "rewards_train/rejected": -3.125, "sft_loss": 0.7890625, "step": 5784 }, { "dpo_loss": 0.1728515625, "epoch": 0.93, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 1.3872886312504717e-08, "loss": 0.1309, "projector_lr": 4.161865893751415e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.953125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.75, "sft_loss": 1.078125, "step": 5785 }, { "dpo_loss": 0.578125, "epoch": 0.93, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 1.3813558124155855e-08, "loss": 0.2972, "projector_lr": 4.144067437246757e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.953125, "rewards_train/margins": 2.359375, "rewards_train/rejected": -3.3125, "sft_loss": 1.140625, "step": 5786 }, { "dpo_loss": 0.162109375, "epoch": 0.93, "final_loss": 0.162109375, "grad_norm": 0.0, "learning_rate": 1.3754355291005049e-08, "loss": 0.0871, "projector_lr": 4.126306587301515e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.5, "sft_loss": 0.61328125, "step": 5787 }, { "dpo_loss": 0.046142578125, "epoch": 0.93, "final_loss": 0.046142578125, "grad_norm": 0.0, "learning_rate": 1.369527782831692e-08, "loss": 0.0902, "projector_lr": 4.1085833484950765e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.09375, "sft_loss": 0.7578125, "step": 5788 }, { "dpo_loss": 0.267578125, "epoch": 0.93, "final_loss": 0.267578125, "grad_norm": 0.0, "learning_rate": 1.3636325751323563e-08, "loss": 0.1453, "projector_lr": 4.0908977253970695e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.03125, "sft_loss": 0.84765625, "step": 5789 }, { "dpo_loss": 0.059326171875, "epoch": 0.93, "final_loss": 0.059326171875, "grad_norm": 0.0, "learning_rate": 1.3577499075224819e-08, "loss": 0.119, "projector_lr": 4.073249722567446e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.53125, "sft_loss": 0.72265625, "step": 5790 }, { "dpo_loss": 0.1728515625, "epoch": 0.93, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 1.351879781518811e-08, "loss": 0.1681, "projector_lr": 4.055639344556433e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 4.65625, "rewards_train/rejected": -6.125, "sft_loss": 0.8984375, "step": 5791 }, { "dpo_loss": 0.111328125, "epoch": 0.93, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 1.3460221986348664e-08, "loss": 0.1239, "projector_lr": 4.038066595904599e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.71875, "sft_loss": 0.7578125, "step": 5792 }, { "dpo_loss": 0.427734375, "epoch": 0.93, "final_loss": 0.427734375, "grad_norm": 0.0, "learning_rate": 1.3401771603809342e-08, "loss": 0.2292, "projector_lr": 4.0205314811428026e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.578125, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.5, "sft_loss": 0.90625, "step": 5793 }, { "dpo_loss": 0.029541015625, "epoch": 0.93, "final_loss": 0.029541015625, "grad_norm": 0.0, "learning_rate": 1.3343446682640481e-08, "loss": 0.0714, "projector_lr": 4.003034004792144e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2314453125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.0625, "sft_loss": 0.7265625, "step": 5794 }, { "dpo_loss": 0.10693359375, "epoch": 0.93, "final_loss": 0.10693359375, "grad_norm": 0.0, "learning_rate": 1.328524723788027e-08, "loss": 0.3292, "projector_lr": 3.985574171364081e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5546875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.0625, "sft_loss": 0.68359375, "step": 5795 }, { "dpo_loss": 0.2021484375, "epoch": 0.93, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 1.3227173284534542e-08, "loss": 0.1579, "projector_lr": 3.968151985360363e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.55078125, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.4375, "sft_loss": 0.73828125, "step": 5796 }, { "dpo_loss": 0.030517578125, "epoch": 0.93, "final_loss": 0.030517578125, "grad_norm": 0.0, "learning_rate": 1.3169224837576543e-08, "loss": 0.017, "projector_lr": 3.950767451272963e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.024658203125, "rewards_train/margins": 4.5, "rewards_train/rejected": -4.46875, "sft_loss": 0.6015625, "step": 5797 }, { "dpo_loss": 0.057861328125, "epoch": 0.93, "final_loss": 0.057861328125, "grad_norm": 0.0, "learning_rate": 1.3111401911947651e-08, "loss": 0.159, "projector_lr": 3.9334205735842955e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.125, "sft_loss": 0.578125, "step": 5798 }, { "dpo_loss": 0.15234375, "epoch": 0.93, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 1.3053704522556164e-08, "loss": 0.1455, "projector_lr": 3.916111356766849e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.25, "sft_loss": 0.625, "step": 5799 }, { "dpo_loss": 0.2373046875, "epoch": 0.93, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 1.2996132684278682e-08, "loss": 0.3287, "projector_lr": 3.898839805283605e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.8125, "sft_loss": 0.70703125, "step": 5800 }, { "dpo_loss": 0.099609375, "epoch": 0.93, "final_loss": 0.099609375, "grad_norm": 0.0, "learning_rate": 1.2938686411959055e-08, "loss": 0.0572, "projector_lr": 3.8816059235877166e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.9375, "sft_loss": 0.78515625, "step": 5801 }, { "dpo_loss": 0.0206298828125, "epoch": 0.93, "final_loss": 0.0206298828125, "grad_norm": 0.0, "learning_rate": 1.2881365720408932e-08, "loss": 0.192, "projector_lr": 3.86440971612268e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.322265625, "rewards_train/margins": 6.5625, "rewards_train/rejected": -6.875, "sft_loss": 0.59375, "step": 5802 }, { "dpo_loss": 0.08154296875, "epoch": 0.93, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 1.2824170624407383e-08, "loss": 0.176, "projector_lr": 3.847251187322215e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.86328125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.40625, "sft_loss": 0.6953125, "step": 5803 }, { "dpo_loss": 0.1494140625, "epoch": 0.93, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 1.2767101138701386e-08, "loss": 0.0989, "projector_lr": 3.830130341610416e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.921875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -7.0, "sft_loss": 0.828125, "step": 5804 }, { "dpo_loss": 0.1845703125, "epoch": 0.93, "final_loss": 0.1845703125, "grad_norm": 0.0, "learning_rate": 1.2710157278005173e-08, "loss": 0.2145, "projector_lr": 3.8130471834015526e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.006103515625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.28125, "sft_loss": 0.67578125, "step": 5805 }, { "dpo_loss": 0.68359375, "epoch": 0.93, "final_loss": 0.68359375, "grad_norm": 0.0, "learning_rate": 1.2653339057001001e-08, "loss": 0.4218, "projector_lr": 3.7960017171003e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.234375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -5.0625, "sft_loss": 0.953125, "step": 5806 }, { "dpo_loss": 0.2099609375, "epoch": 0.93, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 1.2596646490338425e-08, "loss": 0.1206, "projector_lr": 3.7789939471015275e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.625, "sft_loss": 0.7265625, "step": 5807 }, { "dpo_loss": 0.2177734375, "epoch": 0.93, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 1.2540079592634646e-08, "loss": 0.1886, "projector_lr": 3.762023877790394e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.41796875, "rewards_train/margins": 3.21875, "rewards_train/rejected": -3.640625, "sft_loss": 0.765625, "step": 5808 }, { "dpo_loss": 0.181640625, "epoch": 0.93, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 1.2483638378474492e-08, "loss": 0.1774, "projector_lr": 3.745091513542348e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.46875, "sft_loss": 0.55859375, "step": 5809 }, { "dpo_loss": 0.1201171875, "epoch": 0.93, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 1.242732286241055e-08, "loss": 0.0657, "projector_lr": 3.728196858723165e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2041015625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.0625, "sft_loss": 0.76953125, "step": 5810 }, { "dpo_loss": 0.033203125, "epoch": 0.93, "final_loss": 0.033203125, "grad_norm": 0.0, "learning_rate": 1.2371133058962757e-08, "loss": 0.0897, "projector_lr": 3.7113399176888274e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.40625, "sft_loss": 0.71484375, "step": 5811 }, { "dpo_loss": 0.22265625, "epoch": 0.93, "final_loss": 0.22265625, "grad_norm": 0.0, "learning_rate": 1.2315068982618692e-08, "loss": 0.1363, "projector_lr": 3.6945206947856083e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.369140625, "rewards_train/margins": 4.28125, "rewards_train/rejected": -4.65625, "sft_loss": 0.6875, "step": 5812 }, { "dpo_loss": 0.1337890625, "epoch": 0.93, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 1.2259130647833626e-08, "loss": 0.0712, "projector_lr": 3.677739194350088e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.65625, "sft_loss": 0.9296875, "step": 5813 }, { "dpo_loss": 0.2734375, "epoch": 0.93, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 1.2203318069030299e-08, "loss": 0.2062, "projector_lr": 3.66099542070909e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.421875, "rewards_train/rejected": -4.34375, "sft_loss": 0.90625, "step": 5814 }, { "dpo_loss": 0.150390625, "epoch": 0.93, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 1.2147631260599145e-08, "loss": 0.105, "projector_lr": 3.6442893781797434e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.0625, "sft_loss": 0.6484375, "step": 5815 }, { "dpo_loss": 0.05322265625, "epoch": 0.93, "final_loss": 0.05322265625, "grad_norm": 0.0, "learning_rate": 1.2092070236898012e-08, "loss": 0.0923, "projector_lr": 3.6276210710694035e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9609375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -6.3125, "sft_loss": 1.1171875, "step": 5816 }, { "dpo_loss": 0.13671875, "epoch": 0.93, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 1.2036635012252439e-08, "loss": 0.077, "projector_lr": 3.6109905036757316e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.4375, "sft_loss": 0.80859375, "step": 5817 }, { "dpo_loss": 0.0255126953125, "epoch": 0.93, "final_loss": 0.0255126953125, "grad_norm": 0.0, "learning_rate": 1.1981325600955383e-08, "loss": 0.0844, "projector_lr": 3.5943976802866154e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.5, "sft_loss": 0.83203125, "step": 5818 }, { "dpo_loss": 0.373046875, "epoch": 0.93, "final_loss": 0.373046875, "grad_norm": 0.0, "learning_rate": 1.192614201726766e-08, "loss": 0.3323, "projector_lr": 3.577842605180298e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7421875, "rewards_train/margins": 3.0625, "rewards_train/rejected": -3.796875, "sft_loss": 0.65234375, "step": 5819 }, { "dpo_loss": 0.33203125, "epoch": 0.93, "final_loss": 0.33203125, "grad_norm": 0.0, "learning_rate": 1.1871084275417332e-08, "loss": 0.2006, "projector_lr": 3.5613252826252e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 2.296875, "rewards_train/rejected": -3.328125, "sft_loss": 0.57421875, "step": 5820 }, { "dpo_loss": 0.259765625, "epoch": 0.93, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 1.1816152389600153e-08, "loss": 0.1431, "projector_lr": 3.544845716880046e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.140625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -7.09375, "sft_loss": 0.7421875, "step": 5821 }, { "dpo_loss": 0.11767578125, "epoch": 0.93, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 1.1761346373979353e-08, "loss": 0.202, "projector_lr": 3.528403912193806e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.58984375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.4375, "sft_loss": 0.78125, "step": 5822 }, { "dpo_loss": 0.189453125, "epoch": 0.93, "final_loss": 0.189453125, "grad_norm": 0.0, "learning_rate": 1.1706666242685902e-08, "loss": 0.2483, "projector_lr": 3.511999872805771e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.494140625, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.71875, "sft_loss": 0.79296875, "step": 5823 }, { "dpo_loss": 0.062255859375, "epoch": 0.93, "final_loss": 0.062255859375, "grad_norm": 0.0, "learning_rate": 1.1652112009818027e-08, "loss": 0.1224, "projector_lr": 3.495633602945408e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0, "rewards_train/margins": 6.84375, "rewards_train/rejected": -7.84375, "sft_loss": 0.6875, "step": 5824 }, { "dpo_loss": 0.0458984375, "epoch": 0.93, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 1.1597683689441806e-08, "loss": 0.0708, "projector_lr": 3.479305106832542e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.875, "sft_loss": 0.71484375, "step": 5825 }, { "dpo_loss": 0.048828125, "epoch": 0.93, "final_loss": 0.048828125, "grad_norm": 0.0, "learning_rate": 1.1543381295590516e-08, "loss": 0.0851, "projector_lr": 3.4630143886771547e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.234375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.4375, "sft_loss": 1.21875, "step": 5826 }, { "dpo_loss": 0.921875, "epoch": 0.93, "final_loss": 0.921875, "grad_norm": 0.0, "learning_rate": 1.1489204842265232e-08, "loss": 0.483, "projector_lr": 3.44676145267957e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.5625, "rewards_train/margins": 1.6953125, "rewards_train/rejected": -3.265625, "sft_loss": 0.8046875, "step": 5827 }, { "dpo_loss": 0.095703125, "epoch": 0.93, "final_loss": 0.095703125, "grad_norm": 0.0, "learning_rate": 1.1435154343434394e-08, "loss": 0.1139, "projector_lr": 3.4305463030303187e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 5.53125, "rewards_train/rejected": -6.8125, "sft_loss": 0.7109375, "step": 5828 }, { "dpo_loss": 0.6640625, "epoch": 0.93, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 1.1381229813034188e-08, "loss": 0.4612, "projector_lr": 3.414368943910256e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.09375, "sft_loss": 0.63671875, "step": 5829 }, { "dpo_loss": 0.0166015625, "epoch": 0.93, "final_loss": 0.0166015625, "grad_norm": 0.0, "learning_rate": 1.132743126496799e-08, "loss": 0.1885, "projector_lr": 3.3982293794903976e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73828125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.03125, "sft_loss": 0.875, "step": 5830 }, { "dpo_loss": 0.042724609375, "epoch": 0.93, "final_loss": 0.042724609375, "grad_norm": 0.0, "learning_rate": 1.1273758713106984e-08, "loss": 0.1035, "projector_lr": 3.3821276139320956e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.83203125, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.75, "sft_loss": 0.7109375, "step": 5831 }, { "dpo_loss": 0.5390625, "epoch": 0.93, "final_loss": 0.5390625, "grad_norm": 0.0, "learning_rate": 1.1220212171289711e-08, "loss": 0.2719, "projector_lr": 3.3660636513869137e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.75, "sft_loss": 0.66796875, "step": 5832 }, { "dpo_loss": 0.00909423828125, "epoch": 0.93, "final_loss": 0.00909423828125, "grad_norm": 0.0, "learning_rate": 1.1166791653322294e-08, "loss": 0.098, "projector_lr": 3.3500374959966886e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.734375, "rewards_train/margins": 5.5625, "rewards_train/rejected": -7.3125, "sft_loss": 0.9765625, "step": 5833 }, { "dpo_loss": 0.048095703125, "epoch": 0.93, "final_loss": 0.048095703125, "grad_norm": 0.0, "learning_rate": 1.1113497172978325e-08, "loss": 0.1791, "projector_lr": 3.334049151893498e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 5.5, "rewards_train/rejected": -5.71875, "sft_loss": 1.0078125, "step": 5834 }, { "dpo_loss": 0.041259765625, "epoch": 0.93, "final_loss": 0.041259765625, "grad_norm": 0.0, "learning_rate": 1.1060328743998926e-08, "loss": 0.042, "projector_lr": 3.318098623199678e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0498046875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -5.40625, "sft_loss": 0.48828125, "step": 5835 }, { "dpo_loss": 0.369140625, "epoch": 0.93, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 1.1007286380092629e-08, "loss": 0.3095, "projector_lr": 3.302185914027789e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.455078125, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.21875, "sft_loss": 0.77734375, "step": 5836 }, { "dpo_loss": 0.1884765625, "epoch": 0.93, "final_loss": 0.1884765625, "grad_norm": 0.0, "learning_rate": 1.0954370094935662e-08, "loss": 0.1119, "projector_lr": 3.2863110284806983e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.765625, "rewards_train/margins": 3.28125, "rewards_train/rejected": -4.03125, "sft_loss": 0.7578125, "step": 5837 }, { "dpo_loss": 0.2080078125, "epoch": 0.93, "final_loss": 0.2080078125, "grad_norm": 0.0, "learning_rate": 1.0901579902171553e-08, "loss": 0.1261, "projector_lr": 3.270473970651466e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.25, "sft_loss": 0.51953125, "step": 5838 }, { "dpo_loss": 0.177734375, "epoch": 0.93, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 1.0848915815411418e-08, "loss": 0.1047, "projector_lr": 3.254674744623426e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5546875, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.53125, "sft_loss": 0.859375, "step": 5839 }, { "dpo_loss": 0.1025390625, "epoch": 0.93, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 1.0796377848233728e-08, "loss": 0.0701, "projector_lr": 3.2389133544701185e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.236328125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.40625, "sft_loss": 0.59765625, "step": 5840 }, { "dpo_loss": 0.306640625, "epoch": 0.93, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 1.0743966014184646e-08, "loss": 0.2549, "projector_lr": 3.2231898042553944e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.890625, "sft_loss": 0.8046875, "step": 5841 }, { "dpo_loss": 0.044921875, "epoch": 0.93, "final_loss": 0.044921875, "grad_norm": 0.0, "learning_rate": 1.0691680326777641e-08, "loss": 0.1484, "projector_lr": 3.2075040980332925e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.03125, "rewards_train/margins": 5.4375, "rewards_train/rejected": -6.46875, "sft_loss": 0.734375, "step": 5842 }, { "dpo_loss": 0.259765625, "epoch": 0.93, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 1.0639520799493818e-08, "loss": 0.167, "projector_lr": 3.191856239848146e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.78125, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.53125, "sft_loss": 0.61328125, "step": 5843 }, { "dpo_loss": 0.0419921875, "epoch": 0.94, "final_loss": 0.0419921875, "grad_norm": 0.0, "learning_rate": 1.0587487445781473e-08, "loss": 0.0292, "projector_lr": 3.176246233734442e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30078125, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.71875, "sft_loss": 0.8125, "step": 5844 }, { "dpo_loss": 0.10546875, "epoch": 0.94, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 1.0535580279056766e-08, "loss": 0.2587, "projector_lr": 3.1606740837170304e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 6.375, "rewards_train/rejected": -8.0625, "sft_loss": 1.0703125, "step": 5845 }, { "dpo_loss": 0.068359375, "epoch": 0.94, "final_loss": 0.068359375, "grad_norm": 0.0, "learning_rate": 1.0483799312702935e-08, "loss": 0.0998, "projector_lr": 3.145139793810881e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.96875, "sft_loss": 0.67578125, "step": 5846 }, { "dpo_loss": 0.11279296875, "epoch": 0.94, "final_loss": 0.11279296875, "grad_norm": 0.0, "learning_rate": 1.0432144560070966e-08, "loss": 0.1109, "projector_lr": 3.12964336802129e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.21875, "sft_loss": 0.9921875, "step": 5847 }, { "dpo_loss": 0.025634765625, "epoch": 0.94, "final_loss": 0.025634765625, "grad_norm": 0.0, "learning_rate": 1.038061603447915e-08, "loss": 0.1259, "projector_lr": 3.114184810343745e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.49609375, "rewards_train/margins": 6.6875, "rewards_train/rejected": -7.15625, "sft_loss": 0.625, "step": 5848 }, { "dpo_loss": 0.578125, "epoch": 0.94, "final_loss": 0.578125, "grad_norm": 0.0, "learning_rate": 1.0329213749213251e-08, "loss": 0.2892, "projector_lr": 3.098764124763975e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.46875, "sft_loss": 0.7421875, "step": 5849 }, { "dpo_loss": 0.09912109375, "epoch": 0.94, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 1.0277937717526497e-08, "loss": 0.0884, "projector_lr": 3.083381315257949e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.3125, "sft_loss": 0.6796875, "step": 5850 }, { "dpo_loss": 0.150390625, "epoch": 0.94, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 1.0226787952639648e-08, "loss": 0.2162, "projector_lr": 3.068036385791895e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.53125, "sft_loss": 0.78125, "step": 5851 }, { "dpo_loss": 0.028564453125, "epoch": 0.94, "final_loss": 0.028564453125, "grad_norm": 0.0, "learning_rate": 1.0175764467740766e-08, "loss": 0.0351, "projector_lr": 3.05272934032223e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.75, "sft_loss": 0.78515625, "step": 5852 }, { "dpo_loss": 0.1591796875, "epoch": 0.94, "final_loss": 0.1591796875, "grad_norm": 0.0, "learning_rate": 1.0124867275985438e-08, "loss": 0.2789, "projector_lr": 3.037460182795632e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1806640625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -5.0, "sft_loss": 0.76953125, "step": 5853 }, { "dpo_loss": 0.130859375, "epoch": 0.94, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 1.0074096390496667e-08, "loss": 0.1519, "projector_lr": 3.022228917149e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8125, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.5625, "sft_loss": 0.765625, "step": 5854 }, { "dpo_loss": 0.2451171875, "epoch": 0.94, "final_loss": 0.2451171875, "grad_norm": 0.0, "learning_rate": 1.002345182436487e-08, "loss": 0.2377, "projector_lr": 3.007035547309461e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 3.4375, "rewards_train/rejected": -3.96875, "sft_loss": 0.67578125, "step": 5855 }, { "dpo_loss": 0.0498046875, "epoch": 0.94, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 9.972933590647992e-09, "loss": 0.0538, "projector_lr": 2.991880077194398e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.52734375, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.40625, "sft_loss": 0.63671875, "step": 5856 }, { "dpo_loss": 0.109375, "epoch": 0.94, "final_loss": 0.109375, "grad_norm": 0.0, "learning_rate": 9.922541702371222e-09, "loss": 0.1177, "projector_lr": 2.976762510711367e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.703125, "rewards_train/margins": 4.6875, "rewards_train/rejected": -5.375, "sft_loss": 0.77734375, "step": 5857 }, { "dpo_loss": 0.1953125, "epoch": 0.94, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 9.872276172527338e-09, "loss": 0.262, "projector_lr": 2.9616828517582018e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.046875, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.78125, "sft_loss": 0.8984375, "step": 5858 }, { "dpo_loss": 0.18359375, "epoch": 0.94, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 9.822137014076471e-09, "loss": 0.2476, "projector_lr": 2.9466411042229413e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.359375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.9375, "sft_loss": 0.6640625, "step": 5859 }, { "dpo_loss": 0.361328125, "epoch": 0.94, "final_loss": 0.361328125, "grad_norm": 0.0, "learning_rate": 9.772124239946279e-09, "loss": 0.241, "projector_lr": 2.9316372719838836e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.375, "rewards_train/margins": 2.859375, "rewards_train/rejected": -4.25, "sft_loss": 0.96875, "step": 5860 }, { "dpo_loss": 0.240234375, "epoch": 0.94, "final_loss": 0.240234375, "grad_norm": 0.0, "learning_rate": 9.722237863031557e-09, "loss": 0.1885, "projector_lr": 2.9166713589094673e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.15625, "sft_loss": 0.69921875, "step": 5861 }, { "dpo_loss": 0.058837890625, "epoch": 0.94, "final_loss": 0.058837890625, "grad_norm": 0.0, "learning_rate": 9.672477896194852e-09, "loss": 0.1316, "projector_lr": 2.901743368858456e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.50390625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.15625, "sft_loss": 0.8046875, "step": 5862 }, { "dpo_loss": 0.357421875, "epoch": 0.94, "final_loss": 0.357421875, "grad_norm": 0.0, "learning_rate": 9.622844352265847e-09, "loss": 0.1883, "projector_lr": 2.886853305679754e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.125, "sft_loss": 0.96484375, "step": 5863 }, { "dpo_loss": 0.50390625, "epoch": 0.94, "final_loss": 0.50390625, "grad_norm": 0.0, "learning_rate": 9.573337244041745e-09, "loss": 0.258, "projector_lr": 2.872001173212524e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 2.796875, "rewards_train/rejected": -3.765625, "sft_loss": 0.76171875, "step": 5864 }, { "dpo_loss": 0.16796875, "epoch": 0.94, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 9.523956584287174e-09, "loss": 0.2214, "projector_lr": 2.8571869752861522e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 2.984375, "rewards_train/rejected": -4.375, "sft_loss": 0.98046875, "step": 5865 }, { "dpo_loss": 0.051513671875, "epoch": 0.94, "final_loss": 0.051513671875, "grad_norm": 0.0, "learning_rate": 9.474702385734223e-09, "loss": 0.1096, "projector_lr": 2.8424107157202672e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65234375, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.65625, "sft_loss": 0.61328125, "step": 5866 }, { "dpo_loss": 0.25, "epoch": 0.94, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 9.42557466108207e-09, "loss": 0.4451, "projector_lr": 2.827672398324621e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.46875, "rewards_train/margins": 6.0, "rewards_train/rejected": -7.46875, "sft_loss": 0.81640625, "step": 5867 }, { "dpo_loss": 0.1240234375, "epoch": 0.94, "final_loss": 0.1240234375, "grad_norm": 0.0, "learning_rate": 9.37657342299769e-09, "loss": 0.3249, "projector_lr": 2.8129720268993075e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.78125, "sft_loss": 0.609375, "step": 5868 }, { "dpo_loss": 0.08740234375, "epoch": 0.94, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 9.327698684115092e-09, "loss": 0.413, "projector_lr": 2.798309605234528e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 7.375, "rewards_train/rejected": -8.8125, "sft_loss": 0.8515625, "step": 5869 }, { "dpo_loss": 0.08984375, "epoch": 0.94, "final_loss": 0.08984375, "grad_norm": 0.0, "learning_rate": 9.278950457035972e-09, "loss": 0.0965, "projector_lr": 2.783685137110792e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2060546875, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.21875, "sft_loss": 0.80078125, "step": 5870 }, { "dpo_loss": 0.1318359375, "epoch": 0.94, "final_loss": 0.1318359375, "grad_norm": 0.0, "learning_rate": 9.230328754329053e-09, "loss": 0.1087, "projector_lr": 2.7690986262987162e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0234375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.875, "sft_loss": 2.1875, "step": 5871 }, { "dpo_loss": 0.1669921875, "epoch": 0.94, "final_loss": 0.1669921875, "grad_norm": 0.0, "learning_rate": 9.181833588530864e-09, "loss": 0.225, "projector_lr": 2.754550076559259e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.078125, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.1875, "sft_loss": 0.625, "step": 5872 }, { "dpo_loss": 0.1396484375, "epoch": 0.94, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 9.133464972144955e-09, "loss": 0.2994, "projector_lr": 2.7400394916434867e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.890625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.8125, "sft_loss": 0.828125, "step": 5873 }, { "dpo_loss": 0.46875, "epoch": 0.94, "final_loss": 0.46875, "grad_norm": 0.0, "learning_rate": 9.085222917642465e-09, "loss": 0.2543, "projector_lr": 2.7255668752927398e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.9375, "sft_loss": 0.84375, "step": 5874 }, { "dpo_loss": 0.1279296875, "epoch": 0.94, "final_loss": 0.1279296875, "grad_norm": 0.0, "learning_rate": 9.037107437461777e-09, "loss": 0.1967, "projector_lr": 2.7111322312385334e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.1875, "sft_loss": 0.6015625, "step": 5875 }, { "dpo_loss": 0.259765625, "epoch": 0.94, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 8.989118544008689e-09, "loss": 0.355, "projector_lr": 2.696735563202607e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 5.0625, "rewards_train/rejected": -6.3125, "sft_loss": 0.88671875, "step": 5876 }, { "dpo_loss": 0.123046875, "epoch": 0.94, "final_loss": 0.123046875, "grad_norm": 0.0, "learning_rate": 8.941256249656249e-09, "loss": 0.2531, "projector_lr": 2.6823768748968748e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -7.125, "sft_loss": 1.09375, "step": 5877 }, { "dpo_loss": 0.2265625, "epoch": 0.94, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 8.893520566745139e-09, "loss": 0.1727, "projector_lr": 2.668056170023542e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.98046875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.25, "sft_loss": 0.6640625, "step": 5878 }, { "dpo_loss": 0.0081787109375, "epoch": 0.94, "final_loss": 0.0081787109375, "grad_norm": 0.0, "learning_rate": 8.845911507583126e-09, "loss": 0.1424, "projector_lr": 2.6537734522749378e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 6.84375, "rewards_train/rejected": -8.1875, "sft_loss": 0.95703125, "step": 5879 }, { "dpo_loss": 0.08740234375, "epoch": 0.94, "final_loss": 0.08740234375, "grad_norm": 0.0, "learning_rate": 8.798429084445391e-09, "loss": 0.2201, "projector_lr": 2.6395287253336177e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.51171875, "rewards_train/margins": 4.125, "rewards_train/rejected": -4.625, "sft_loss": 0.63671875, "step": 5880 }, { "dpo_loss": 0.21875, "epoch": 0.94, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 8.75107330957453e-09, "loss": 0.1188, "projector_lr": 2.6253219928723594e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.068359375, "rewards_train/margins": 5.46875, "rewards_train/rejected": -5.40625, "sft_loss": 0.6015625, "step": 5881 }, { "dpo_loss": 0.2890625, "epoch": 0.94, "final_loss": 0.2890625, "grad_norm": 0.0, "learning_rate": 8.703844195180554e-09, "loss": 0.2725, "projector_lr": 2.6111532585541663e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.484375, "rewards_train/rejected": -4.90625, "sft_loss": 0.7734375, "step": 5882 }, { "dpo_loss": 0.125, "epoch": 0.94, "final_loss": 0.125, "grad_norm": 0.0, "learning_rate": 8.6567417534405e-09, "loss": 0.1408, "projector_lr": 2.59702252603215e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.84375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -6.125, "sft_loss": 1.1328125, "step": 5883 }, { "dpo_loss": 0.0186767578125, "epoch": 0.94, "final_loss": 0.0186767578125, "grad_norm": 0.0, "learning_rate": 8.609765996499207e-09, "loss": 0.091, "projector_lr": 2.5829297989497624e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.478515625, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.09375, "sft_loss": 0.64453125, "step": 5884 }, { "dpo_loss": 0.1787109375, "epoch": 0.94, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 8.562916936468378e-09, "loss": 0.0917, "projector_lr": 2.5688750809405135e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.1875, "sft_loss": 0.9765625, "step": 5885 }, { "dpo_loss": 0.208984375, "epoch": 0.94, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 8.51619458542746e-09, "loss": 0.2116, "projector_lr": 2.5548583756282385e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -5.25, "sft_loss": 0.9453125, "step": 5886 }, { "dpo_loss": 0.06689453125, "epoch": 0.94, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 8.469598955422875e-09, "loss": 0.1458, "projector_lr": 2.5408796866268623e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7109375, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.90625, "sft_loss": 0.9921875, "step": 5887 }, { "dpo_loss": 0.1494140625, "epoch": 0.94, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 8.423130058468675e-09, "loss": 0.3128, "projector_lr": 2.5269390175406026e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.5, "sft_loss": 0.87109375, "step": 5888 }, { "dpo_loss": 0.146484375, "epoch": 0.94, "final_loss": 0.146484375, "grad_norm": 0.0, "learning_rate": 8.376787906546112e-09, "loss": 0.1752, "projector_lr": 2.513036371963834e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.90625, "sft_loss": 0.83984375, "step": 5889 }, { "dpo_loss": 0.10888671875, "epoch": 0.94, "final_loss": 0.10888671875, "grad_norm": 0.0, "learning_rate": 8.330572511603628e-09, "loss": 0.0734, "projector_lr": 2.4991717534810888e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.90625, "rewards_train/margins": 6.34375, "rewards_train/rejected": -8.25, "sft_loss": 0.6796875, "step": 5890 }, { "dpo_loss": 0.263671875, "epoch": 0.94, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 8.284483885557136e-09, "loss": 0.3124, "projector_lr": 2.485345165667141e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91015625, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.65625, "sft_loss": 0.79296875, "step": 5891 }, { "dpo_loss": 0.08251953125, "epoch": 0.94, "final_loss": 0.08251953125, "grad_norm": 0.0, "learning_rate": 8.238522040289964e-09, "loss": 0.0457, "projector_lr": 2.4715566120869893e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.44921875, "rewards_train/margins": 7.15625, "rewards_train/rejected": -7.625, "sft_loss": 0.71875, "step": 5892 }, { "dpo_loss": 0.00830078125, "epoch": 0.94, "final_loss": 0.00830078125, "grad_norm": 0.0, "learning_rate": 8.192686987652464e-09, "loss": 0.3319, "projector_lr": 2.4578060962957393e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.208984375, "rewards_train/margins": 8.875, "rewards_train/rejected": -8.6875, "sft_loss": 0.55078125, "step": 5893 }, { "dpo_loss": 0.06494140625, "epoch": 0.94, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 8.146978739462463e-09, "loss": 0.0355, "projector_lr": 2.4440936218387388e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53125, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.40625, "sft_loss": 0.6796875, "step": 5894 }, { "dpo_loss": 0.05908203125, "epoch": 0.94, "final_loss": 0.05908203125, "grad_norm": 0.0, "learning_rate": 8.101397307505198e-09, "loss": 0.0538, "projector_lr": 2.4304191922515595e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1650390625, "rewards_train/margins": 5.8125, "rewards_train/rejected": -5.96875, "sft_loss": 0.859375, "step": 5895 }, { "dpo_loss": 0.1728515625, "epoch": 0.94, "final_loss": 0.1728515625, "grad_norm": 0.0, "learning_rate": 8.055942703533047e-09, "loss": 0.111, "projector_lr": 2.4167828110599143e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5078125, "rewards_train/margins": 3.1875, "rewards_train/rejected": -4.6875, "sft_loss": 0.9921875, "step": 5896 }, { "dpo_loss": 0.12060546875, "epoch": 0.94, "final_loss": 0.12060546875, "grad_norm": 0.0, "learning_rate": 8.010614939265692e-09, "loss": 0.1337, "projector_lr": 2.403184481779708e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.09521484375, "rewards_train/margins": 7.0625, "rewards_train/rejected": -6.96875, "sft_loss": 0.66796875, "step": 5897 }, { "dpo_loss": 0.236328125, "epoch": 0.94, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 7.965414026390116e-09, "loss": 0.2743, "projector_lr": 2.389624207917035e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.9375, "sft_loss": 0.87890625, "step": 5898 }, { "dpo_loss": 0.2021484375, "epoch": 0.94, "final_loss": 0.2021484375, "grad_norm": 0.0, "learning_rate": 7.920339976560775e-09, "loss": 0.1402, "projector_lr": 2.3761019929682326e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9765625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.3125, "sft_loss": 0.81640625, "step": 5899 }, { "dpo_loss": 0.369140625, "epoch": 0.94, "final_loss": 0.369140625, "grad_norm": 0.0, "learning_rate": 7.875392801399094e-09, "loss": 0.358, "projector_lr": 2.3626178404197286e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 1.953125, "rewards_train/rejected": -3.28125, "sft_loss": 1.1015625, "step": 5900 }, { "dpo_loss": 0.0264892578125, "epoch": 0.94, "final_loss": 0.0264892578125, "grad_norm": 0.0, "learning_rate": 7.830572512494138e-09, "loss": 0.3407, "projector_lr": 2.3491717537482416e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.22265625, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.59375, "sft_loss": 0.69921875, "step": 5901 }, { "dpo_loss": 0.203125, "epoch": 0.94, "final_loss": 0.203125, "grad_norm": 0.0, "learning_rate": 7.785879121401995e-09, "loss": 0.3198, "projector_lr": 2.3357637364205986e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.8125, "sft_loss": 0.78125, "step": 5902 }, { "dpo_loss": 0.09912109375, "epoch": 0.94, "final_loss": 0.09912109375, "grad_norm": 0.0, "learning_rate": 7.74131263964617e-09, "loss": 0.1163, "projector_lr": 2.322393791893851e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.4375, "sft_loss": 0.55859375, "step": 5903 }, { "dpo_loss": 0.1083984375, "epoch": 0.94, "final_loss": 0.1083984375, "grad_norm": 0.0, "learning_rate": 7.696873078717247e-09, "loss": 0.1297, "projector_lr": 2.3090619236151746e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -4.65625, "sft_loss": 0.74609375, "step": 5904 }, { "dpo_loss": 0.34765625, "epoch": 0.94, "final_loss": 0.34765625, "grad_norm": 0.0, "learning_rate": 7.652560450073452e-09, "loss": 0.254, "projector_lr": 2.295768135022036e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.71875, "sft_loss": 0.7578125, "step": 5905 }, { "dpo_loss": 0.06494140625, "epoch": 0.94, "final_loss": 0.06494140625, "grad_norm": 0.0, "learning_rate": 7.608374765139924e-09, "loss": 0.0467, "projector_lr": 2.2825124295419774e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.90234375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.5625, "sft_loss": 0.74609375, "step": 5906 }, { "dpo_loss": 0.294921875, "epoch": 0.95, "final_loss": 0.294921875, "grad_norm": 0.0, "learning_rate": 7.564316035309327e-09, "loss": 0.2057, "projector_lr": 2.269294810592798e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.4375, "sft_loss": 0.62109375, "step": 5907 }, { "dpo_loss": 0.0084228515625, "epoch": 0.95, "final_loss": 0.0084228515625, "grad_norm": 0.0, "learning_rate": 7.52038427194135e-09, "loss": 0.0141, "projector_lr": 2.256115281582405e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.02197265625, "rewards_train/margins": 5.40625, "rewards_train/rejected": -5.375, "sft_loss": 0.5703125, "step": 5908 }, { "dpo_loss": 0.1875, "epoch": 0.95, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 7.476579486363155e-09, "loss": 0.1405, "projector_lr": 2.2429738459089467e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.484375, "rewards_train/margins": 3.6875, "rewards_train/rejected": -5.15625, "sft_loss": 0.78125, "step": 5909 }, { "dpo_loss": 0.4609375, "epoch": 0.95, "final_loss": 0.4609375, "grad_norm": 0.0, "learning_rate": 7.4329016898690976e-09, "loss": 0.2778, "projector_lr": 2.2298705069607294e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87890625, "rewards_train/margins": 4.875, "rewards_train/rejected": -5.78125, "sft_loss": 0.59765625, "step": 5910 }, { "dpo_loss": 0.033203125, "epoch": 0.95, "final_loss": 0.033203125, "grad_norm": 0.0, "learning_rate": 7.38935089372078e-09, "loss": 0.0488, "projector_lr": 2.2168052681162344e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8359375, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.34375, "sft_loss": 0.52734375, "step": 5911 }, { "dpo_loss": 0.21484375, "epoch": 0.95, "final_loss": 0.21484375, "grad_norm": 0.0, "learning_rate": 7.345927109146999e-09, "loss": 0.1558, "projector_lr": 2.2037781327441e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.75, "sft_loss": 0.8203125, "step": 5912 }, { "dpo_loss": 0.033447265625, "epoch": 0.95, "final_loss": 0.033447265625, "grad_norm": 0.0, "learning_rate": 7.302630347343908e-09, "loss": 0.0352, "projector_lr": 2.1907891042031725e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.099609375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -4.46875, "sft_loss": 0.8046875, "step": 5913 }, { "dpo_loss": 0.2734375, "epoch": 0.95, "final_loss": 0.2734375, "grad_norm": 0.0, "learning_rate": 7.259460619474855e-09, "loss": 0.1882, "projector_lr": 2.1778381858424568e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.875, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.03125, "sft_loss": 0.796875, "step": 5914 }, { "dpo_loss": 0.020263671875, "epoch": 0.95, "final_loss": 0.020263671875, "grad_norm": 0.0, "learning_rate": 7.216417936670549e-09, "loss": 0.0237, "projector_lr": 2.1649253810011647e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1875, "rewards_train/margins": 6.78125, "rewards_train/rejected": -6.96875, "sft_loss": 0.65234375, "step": 5915 }, { "dpo_loss": 0.259765625, "epoch": 0.95, "final_loss": 0.259765625, "grad_norm": 0.0, "learning_rate": 7.173502310028667e-09, "loss": 0.2021, "projector_lr": 2.1520506930086003e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4765625, "rewards_train/margins": 4.75, "rewards_train/rejected": -6.21875, "sft_loss": 0.765625, "step": 5916 }, { "dpo_loss": 0.03271484375, "epoch": 0.95, "final_loss": 0.03271484375, "grad_norm": 0.0, "learning_rate": 7.130713750614414e-09, "loss": 0.0706, "projector_lr": 2.1392141251843243e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 6.03125, "rewards_train/rejected": -6.875, "sft_loss": 0.77734375, "step": 5917 }, { "dpo_loss": 0.8203125, "epoch": 0.95, "final_loss": 0.8203125, "grad_norm": 0.0, "learning_rate": 7.088052269460076e-09, "loss": 0.4299, "projector_lr": 2.126415680838023e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4609375, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.9375, "sft_loss": 0.65625, "step": 5918 }, { "dpo_loss": 0.2060546875, "epoch": 0.95, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 7.045517877565188e-09, "loss": 0.2076, "projector_lr": 2.1136553632695567e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.40625, "rewards_train/margins": 2.90625, "rewards_train/rejected": -4.3125, "sft_loss": 0.89453125, "step": 5919 }, { "dpo_loss": 0.2099609375, "epoch": 0.95, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 7.003110585896643e-09, "loss": 0.1146, "projector_lr": 2.100933175768993e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.9375, "sft_loss": 0.671875, "step": 5920 }, { "dpo_loss": 0.10498046875, "epoch": 0.95, "final_loss": 0.10498046875, "grad_norm": 0.0, "learning_rate": 6.960830405388418e-09, "loss": 0.227, "projector_lr": 2.0882491216165256e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 5.0, "rewards_train/rejected": -5.53125, "sft_loss": 0.6015625, "step": 5921 }, { "dpo_loss": 0.09130859375, "epoch": 0.95, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 6.918677346941681e-09, "loss": 0.0803, "projector_lr": 2.0756032040825047e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.796875, "rewards_train/margins": 6.28125, "rewards_train/rejected": -8.0625, "sft_loss": 0.7890625, "step": 5922 }, { "dpo_loss": 0.11962890625, "epoch": 0.95, "final_loss": 0.11962890625, "grad_norm": 0.0, "learning_rate": 6.8766514214250175e-09, "loss": 0.1504, "projector_lr": 2.0629954264275052e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 3.90625, "rewards_train/rejected": -4.6875, "sft_loss": 0.74609375, "step": 5923 }, { "dpo_loss": 0.494140625, "epoch": 0.95, "final_loss": 0.494140625, "grad_norm": 0.0, "learning_rate": 6.83475263967409e-09, "loss": 0.2878, "projector_lr": 2.0504257919022273e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.9375, "rewards_train/margins": 2.40625, "rewards_train/rejected": -4.34375, "sft_loss": 1.640625, "step": 5924 }, { "dpo_loss": 0.058349609375, "epoch": 0.95, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 6.79298101249176e-09, "loss": 0.037, "projector_lr": 2.0378943037475284e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8203125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.5625, "sft_loss": 0.95703125, "step": 5925 }, { "dpo_loss": 0.482421875, "epoch": 0.95, "final_loss": 0.482421875, "grad_norm": 0.0, "learning_rate": 6.7513365506481904e-09, "loss": 0.3689, "projector_lr": 2.025400965194457e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.03125, "sft_loss": 0.890625, "step": 5926 }, { "dpo_loss": 0.12890625, "epoch": 0.95, "final_loss": 0.12890625, "grad_norm": 0.0, "learning_rate": 6.7098192648807894e-09, "loss": 0.1456, "projector_lr": 2.012945779464237e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.515625, "rewards_train/margins": 4.46875, "rewards_train/rejected": -6.0, "sft_loss": 0.89453125, "step": 5927 }, { "dpo_loss": 0.30859375, "epoch": 0.95, "final_loss": 0.30859375, "grad_norm": 0.0, "learning_rate": 6.668429165893996e-09, "loss": 0.3631, "projector_lr": 2.000528749768199e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.53125, "sft_loss": 0.57421875, "step": 5928 }, { "dpo_loss": 0.150390625, "epoch": 0.95, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 6.627166264359662e-09, "loss": 0.1479, "projector_lr": 1.9881498793078988e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.4453125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.40625, "sft_loss": 0.65234375, "step": 5929 }, { "dpo_loss": 0.10791015625, "epoch": 0.95, "final_loss": 0.10791015625, "grad_norm": 0.0, "learning_rate": 6.586030570916723e-09, "loss": 0.0637, "projector_lr": 1.975809171275017e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.7265625, "rewards_train/margins": 7.78125, "rewards_train/rejected": -7.03125, "sft_loss": 0.59375, "step": 5930 }, { "dpo_loss": 0.057861328125, "epoch": 0.95, "final_loss": 0.057861328125, "grad_norm": 0.0, "learning_rate": 6.5450220961713594e-09, "loss": 0.2524, "projector_lr": 1.963506628851408e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66015625, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.84375, "sft_loss": 0.578125, "step": 5931 }, { "dpo_loss": 0.138671875, "epoch": 0.95, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 6.504140850696782e-09, "loss": 0.0836, "projector_lr": 1.9512422552090346e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.484375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.53125, "sft_loss": 0.8203125, "step": 5932 }, { "dpo_loss": 0.0245361328125, "epoch": 0.95, "final_loss": 0.0245361328125, "grad_norm": 0.0, "learning_rate": 6.46338684503378e-09, "loss": 0.1934, "projector_lr": 1.9390160535101343e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 4.875, "rewards_train/rejected": -6.375, "sft_loss": 0.7578125, "step": 5933 }, { "dpo_loss": 0.20703125, "epoch": 0.95, "final_loss": 0.20703125, "grad_norm": 0.0, "learning_rate": 6.422760089690005e-09, "loss": 0.1819, "projector_lr": 1.9268280269070015e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8671875, "rewards_train/margins": 6.125, "rewards_train/rejected": -8.0, "sft_loss": 0.82421875, "step": 5934 }, { "dpo_loss": 0.181640625, "epoch": 0.95, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 6.382260595140298e-09, "loss": 0.2793, "projector_lr": 1.9146781785420898e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6015625, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.875, "sft_loss": 0.53125, "step": 5935 }, { "dpo_loss": 0.0576171875, "epoch": 0.95, "final_loss": 0.0576171875, "grad_norm": 0.0, "learning_rate": 6.341888371826976e-09, "loss": 0.2359, "projector_lr": 1.902566511548093e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.1357421875, "rewards_train/margins": 6.46875, "rewards_train/rejected": -6.34375, "sft_loss": 0.66015625, "step": 5936 }, { "dpo_loss": 0.55078125, "epoch": 0.95, "final_loss": 0.55078125, "grad_norm": 0.0, "learning_rate": 6.301643430159209e-09, "loss": 0.3004, "projector_lr": 1.8904930290477628e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.625, "rewards_train/margins": 3.578125, "rewards_train/rejected": -4.1875, "sft_loss": 0.87109375, "step": 5937 }, { "dpo_loss": 0.33984375, "epoch": 0.95, "final_loss": 0.33984375, "grad_norm": 0.0, "learning_rate": 6.261525780513588e-09, "loss": 0.2461, "projector_lr": 1.8784577341540764e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.65625, "rewards_train/margins": 2.78125, "rewards_train/rejected": -3.4375, "sft_loss": 0.8359375, "step": 5938 }, { "dpo_loss": 0.01385498046875, "epoch": 0.95, "final_loss": 0.01385498046875, "grad_norm": 0.0, "learning_rate": 6.221535433233671e-09, "loss": 0.0286, "projector_lr": 1.8664606299701015e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.8125, "sft_loss": 0.7734375, "step": 5939 }, { "dpo_loss": 0.23828125, "epoch": 0.95, "final_loss": 0.23828125, "grad_norm": 0.0, "learning_rate": 6.181672398630433e-09, "loss": 0.1616, "projector_lr": 1.8545017195891302e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.640625, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.53125, "sft_loss": 0.6953125, "step": 5940 }, { "dpo_loss": 0.03515625, "epoch": 0.95, "final_loss": 0.03515625, "grad_norm": 0.0, "learning_rate": 6.141936686981875e-09, "loss": 0.0636, "projector_lr": 1.8425810060945623e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.359375, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.46875, "sft_loss": 0.81640625, "step": 5941 }, { "dpo_loss": 0.09375, "epoch": 0.95, "final_loss": 0.09375, "grad_norm": 0.0, "learning_rate": 6.102328308533244e-09, "loss": 0.0949, "projector_lr": 1.8306984925599734e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.96875, "rewards_train/rejected": -6.1875, "sft_loss": 1.0390625, "step": 5942 }, { "dpo_loss": 0.01708984375, "epoch": 0.95, "final_loss": 0.01708984375, "grad_norm": 0.0, "learning_rate": 6.062847273496763e-09, "loss": 0.0495, "projector_lr": 1.818854182049029e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.25, "sft_loss": 0.73828125, "step": 5943 }, { "dpo_loss": 0.2109375, "epoch": 0.95, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 6.023493592052065e-09, "loss": 0.1785, "projector_lr": 1.8070480776156195e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7578125, "rewards_train/margins": 3.0, "rewards_train/rejected": -4.75, "sft_loss": 1.125, "step": 5944 }, { "dpo_loss": 0.0625, "epoch": 0.95, "final_loss": 0.0625, "grad_norm": 0.0, "learning_rate": 5.984267274345867e-09, "loss": 0.1027, "projector_lr": 1.79528018230376e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.109375, "rewards_train/margins": 4.90625, "rewards_train/rejected": -5.03125, "sft_loss": 0.7578125, "step": 5945 }, { "dpo_loss": 0.1494140625, "epoch": 0.95, "final_loss": 0.1494140625, "grad_norm": 0.0, "learning_rate": 5.9451683304920234e-09, "loss": 0.1084, "projector_lr": 1.7835504991476072e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.455078125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.03125, "sft_loss": 0.58984375, "step": 5946 }, { "dpo_loss": 0.44140625, "epoch": 0.95, "final_loss": 0.44140625, "grad_norm": 0.0, "learning_rate": 5.906196770571525e-09, "loss": 0.3477, "projector_lr": 1.7718590311714576e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 3.796875, "rewards_train/rejected": -5.625, "sft_loss": 0.94921875, "step": 5947 }, { "dpo_loss": 0.322265625, "epoch": 0.95, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 5.867352604632614e-09, "loss": 0.2079, "projector_lr": 1.760205781389784e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.5625, "sft_loss": 0.85546875, "step": 5948 }, { "dpo_loss": 0.4921875, "epoch": 0.95, "final_loss": 0.4921875, "grad_norm": 0.0, "learning_rate": 5.828635842690499e-09, "loss": 0.3524, "projector_lr": 1.74859075280715e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.265625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.09375, "sft_loss": 0.63671875, "step": 5949 }, { "dpo_loss": 0.2470703125, "epoch": 0.95, "final_loss": 0.2470703125, "grad_norm": 0.0, "learning_rate": 5.790046494727752e-09, "loss": 0.2488, "projector_lr": 1.7370139484183257e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.890625, "rewards_train/margins": 4.8125, "rewards_train/rejected": -6.6875, "sft_loss": 1.09375, "step": 5950 }, { "dpo_loss": 0.1337890625, "epoch": 0.95, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 5.751584570694024e-09, "loss": 0.2689, "projector_lr": 1.7254753712082072e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.36328125, "rewards_train/margins": 5.25, "rewards_train/rejected": -5.625, "sft_loss": 0.734375, "step": 5951 }, { "dpo_loss": 0.49609375, "epoch": 0.95, "final_loss": 0.49609375, "grad_norm": 0.0, "learning_rate": 5.713250080506049e-09, "loss": 0.2857, "projector_lr": 1.7139750241518147e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.59375, "sft_loss": 0.7890625, "step": 5952 }, { "dpo_loss": 0.001617431640625, "epoch": 0.95, "final_loss": 0.001617431640625, "grad_norm": 0.0, "learning_rate": 5.6750430340476995e-09, "loss": 0.0996, "projector_lr": 1.70251291021431e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 7.375, "rewards_train/rejected": -7.6875, "sft_loss": 0.69921875, "step": 5953 }, { "dpo_loss": 0.419921875, "epoch": 0.95, "final_loss": 0.419921875, "grad_norm": 0.0, "learning_rate": 5.636963441170151e-09, "loss": 0.4054, "projector_lr": 1.6910890323510453e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.6640625, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.71875, "sft_loss": 0.66796875, "step": 5954 }, { "dpo_loss": 0.026123046875, "epoch": 0.95, "final_loss": 0.026123046875, "grad_norm": 0.0, "learning_rate": 5.599011311691493e-09, "loss": 0.2215, "projector_lr": 1.679703393507448e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2119140625, "rewards_train/margins": 7.75, "rewards_train/rejected": -7.96875, "sft_loss": 0.8515625, "step": 5955 }, { "dpo_loss": 0.1103515625, "epoch": 0.95, "final_loss": 0.1103515625, "grad_norm": 0.0, "learning_rate": 5.5611866553971766e-09, "loss": 0.073, "projector_lr": 1.668355996619153e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.39453125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.0, "sft_loss": 0.546875, "step": 5956 }, { "dpo_loss": 0.0308837890625, "epoch": 0.95, "final_loss": 0.0308837890625, "grad_norm": 0.0, "learning_rate": 5.5234894820395115e-09, "loss": 0.0587, "projector_lr": 1.6570468446118538e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.265625, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.5, "sft_loss": 1.125, "step": 5957 }, { "dpo_loss": 0.2265625, "epoch": 0.95, "final_loss": 0.2265625, "grad_norm": 0.0, "learning_rate": 5.485919801338224e-09, "loss": 0.1272, "projector_lr": 1.6457759404014672e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.046875, "rewards_train/margins": 6.53125, "rewards_train/rejected": -7.5625, "sft_loss": 0.64453125, "step": 5958 }, { "dpo_loss": 0.45703125, "epoch": 0.95, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 5.448477622979952e-09, "loss": 0.5996, "projector_lr": 1.6345432868939857e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.09619140625, "rewards_train/margins": 8.0, "rewards_train/rejected": -8.0625, "sft_loss": 0.66015625, "step": 5959 }, { "dpo_loss": 0.076171875, "epoch": 0.95, "final_loss": 0.076171875, "grad_norm": 0.0, "learning_rate": 5.4111629566186975e-09, "loss": 0.097, "projector_lr": 1.6233488869856095e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 7.03125, "rewards_train/rejected": -7.625, "sft_loss": 0.61328125, "step": 5960 }, { "dpo_loss": 0.111328125, "epoch": 0.95, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 5.373975811875209e-09, "loss": 0.0989, "projector_lr": 1.6121927435625627e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.0, "sft_loss": 0.8125, "step": 5961 }, { "dpo_loss": 0.232421875, "epoch": 0.95, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 5.33691619833776e-09, "loss": 0.1959, "projector_lr": 1.601074859501328e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3671875, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.6875, "sft_loss": 0.90625, "step": 5962 }, { "dpo_loss": 0.19140625, "epoch": 0.95, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 5.299984125561485e-09, "loss": 0.173, "projector_lr": 1.5899952376684457e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8046875, "rewards_train/margins": 3.78125, "rewards_train/rejected": -4.59375, "sft_loss": 0.76953125, "step": 5963 }, { "dpo_loss": 0.1396484375, "epoch": 0.95, "final_loss": 0.1396484375, "grad_norm": 0.0, "learning_rate": 5.263179603068768e-09, "loss": 0.2492, "projector_lr": 1.5789538809206305e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.61328125, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.375, "sft_loss": 0.890625, "step": 5964 }, { "dpo_loss": 0.0159912109375, "epoch": 0.95, "final_loss": 0.0159912109375, "grad_norm": 0.0, "learning_rate": 5.226502640349017e-09, "loss": 0.0099, "projector_lr": 1.5679507921047054e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 7.84375, "rewards_train/rejected": -8.4375, "sft_loss": 0.7109375, "step": 5965 }, { "dpo_loss": 0.05078125, "epoch": 0.95, "final_loss": 0.05078125, "grad_norm": 0.0, "learning_rate": 5.1899532468587804e-09, "loss": 0.0271, "projector_lr": 1.5569859740576342e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.71875, "sft_loss": 0.66796875, "step": 5966 }, { "dpo_loss": 0.25390625, "epoch": 0.95, "final_loss": 0.25390625, "grad_norm": 0.0, "learning_rate": 5.153531432021685e-09, "loss": 0.2503, "projector_lr": 1.5460594296065055e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.1875, "sft_loss": 0.765625, "step": 5967 }, { "dpo_loss": 0.1787109375, "epoch": 0.95, "final_loss": 0.1787109375, "grad_norm": 0.0, "learning_rate": 5.117237205228609e-09, "loss": 0.1055, "projector_lr": 1.5351711615685827e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.421875, "rewards_train/margins": 3.96875, "rewards_train/rejected": -5.375, "sft_loss": 1.140625, "step": 5968 }, { "dpo_loss": 0.10302734375, "epoch": 0.96, "final_loss": 0.10302734375, "grad_norm": 0.0, "learning_rate": 5.081070575837343e-09, "loss": 0.1159, "projector_lr": 1.524321172751203e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.296875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.375, "sft_loss": 0.7265625, "step": 5969 }, { "dpo_loss": 0.42578125, "epoch": 0.96, "final_loss": 0.42578125, "grad_norm": 0.0, "learning_rate": 5.045031553172818e-09, "loss": 0.2556, "projector_lr": 1.5135094659518457e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4453125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -7.28125, "sft_loss": 0.80078125, "step": 5970 }, { "dpo_loss": 0.302734375, "epoch": 0.96, "final_loss": 0.302734375, "grad_norm": 0.0, "learning_rate": 5.009120146527157e-09, "loss": 0.1564, "projector_lr": 1.502736043958147e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.5, "rewards_train/rejected": -5.84375, "sft_loss": 0.90625, "step": 5971 }, { "dpo_loss": 0.1953125, "epoch": 0.96, "final_loss": 0.1953125, "grad_norm": 0.0, "learning_rate": 4.9733363651595635e-09, "loss": 0.128, "projector_lr": 1.4920009095478692e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.703125, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.71875, "sft_loss": 0.875, "step": 5972 }, { "dpo_loss": 0.03662109375, "epoch": 0.96, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 4.937680218296214e-09, "loss": 0.0502, "projector_lr": 1.4813040654888643e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 6.0625, "rewards_train/rejected": -7.21875, "sft_loss": 0.671875, "step": 5973 }, { "dpo_loss": 0.2119140625, "epoch": 0.96, "final_loss": 0.2119140625, "grad_norm": 0.0, "learning_rate": 4.902151715130587e-09, "loss": 0.1389, "projector_lr": 1.4706455145391762e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.71875, "sft_loss": 0.8515625, "step": 5974 }, { "dpo_loss": 0.26171875, "epoch": 0.96, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 4.866750864822966e-09, "loss": 0.1601, "projector_lr": 1.4600252594468899e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.7265625, "rewards_train/margins": 2.84375, "rewards_train/rejected": -4.5625, "sft_loss": 0.8203125, "step": 5975 }, { "dpo_loss": 0.19140625, "epoch": 0.96, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 4.8314776765009945e-09, "loss": 0.1592, "projector_lr": 1.4494433029502984e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.8515625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -7.0, "sft_loss": 1.1796875, "step": 5976 }, { "dpo_loss": 0.392578125, "epoch": 0.96, "final_loss": 0.392578125, "grad_norm": 0.0, "learning_rate": 4.796332159259231e-09, "loss": 0.432, "projector_lr": 1.4388996477777693e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.65625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.0625, "sft_loss": 0.75390625, "step": 5977 }, { "dpo_loss": 0.546875, "epoch": 0.96, "final_loss": 0.546875, "grad_norm": 0.0, "learning_rate": 4.761314322159426e-09, "loss": 0.2826, "projector_lr": 1.428394296647828e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.21875, "sft_loss": 0.90234375, "step": 5978 }, { "dpo_loss": 0.27734375, "epoch": 0.96, "final_loss": 0.27734375, "grad_norm": 0.0, "learning_rate": 4.726424174230304e-09, "loss": 0.2775, "projector_lr": 1.417927252269091e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.03125, "sft_loss": 0.63671875, "step": 5979 }, { "dpo_loss": 0.0380859375, "epoch": 0.96, "final_loss": 0.0380859375, "grad_norm": 0.0, "learning_rate": 4.691661724467722e-09, "loss": 0.0927, "projector_lr": 1.4074985173403166e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.21875, "sft_loss": 1.0546875, "step": 5980 }, { "dpo_loss": 0.177734375, "epoch": 0.96, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 4.657026981834622e-09, "loss": 0.3729, "projector_lr": 1.3971080945503866e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1689453125, "rewards_train/margins": 4.375, "rewards_train/rejected": -4.5625, "sft_loss": 0.8125, "step": 5981 }, { "dpo_loss": 0.154296875, "epoch": 0.96, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 4.622519955260973e-09, "loss": 0.086, "projector_lr": 1.3867559865782919e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.59375, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.40625, "sft_loss": 0.6796875, "step": 5982 }, { "dpo_loss": 0.1689453125, "epoch": 0.96, "final_loss": 0.1689453125, "grad_norm": 0.0, "learning_rate": 4.5881406536439345e-09, "loss": 0.1139, "projector_lr": 1.3764421960931805e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.21875, "sft_loss": 0.5703125, "step": 5983 }, { "dpo_loss": 0.228515625, "epoch": 0.96, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 4.553889085847529e-09, "loss": 0.1438, "projector_lr": 1.3661667257542588e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 5.40625, "rewards_train/rejected": -6.5, "sft_loss": 0.84765625, "step": 5984 }, { "dpo_loss": 0.1259765625, "epoch": 0.96, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 4.519765260703024e-09, "loss": 0.369, "projector_lr": 1.3559295782109072e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.30078125, "rewards_train/margins": 3.375, "rewards_train/rejected": -3.671875, "sft_loss": 1.1015625, "step": 5985 }, { "dpo_loss": 0.19140625, "epoch": 0.96, "final_loss": 0.19140625, "grad_norm": 0.0, "learning_rate": 4.485769187008659e-09, "loss": 0.1001, "projector_lr": 1.3457307561025977e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.458984375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.03125, "sft_loss": 0.83203125, "step": 5986 }, { "dpo_loss": 0.2255859375, "epoch": 0.96, "final_loss": 0.2255859375, "grad_norm": 0.0, "learning_rate": 4.451900873529868e-09, "loss": 0.1176, "projector_lr": 1.3355702620589604e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.392578125, "rewards_train/margins": 4.40625, "rewards_train/rejected": -4.78125, "sft_loss": 0.6171875, "step": 5987 }, { "dpo_loss": 0.291015625, "epoch": 0.96, "final_loss": 0.291015625, "grad_norm": 0.0, "learning_rate": 4.418160328998888e-09, "loss": 0.3441, "projector_lr": 1.3254480986996665e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.984375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -7.5625, "sft_loss": 0.8671875, "step": 5988 }, { "dpo_loss": 0.38671875, "epoch": 0.96, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 4.384547562115204e-09, "loss": 0.2037, "projector_lr": 1.3153642686345612e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.71484375, "rewards_train/margins": 3.75, "rewards_train/rejected": -4.46875, "sft_loss": 0.8046875, "step": 5989 }, { "dpo_loss": 0.0089111328125, "epoch": 0.96, "final_loss": 0.0089111328125, "grad_norm": 0.0, "learning_rate": 4.3510625815453835e-09, "loss": 0.0394, "projector_lr": 1.3053187744636153e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.01806640625, "rewards_train/margins": 6.40625, "rewards_train/rejected": -6.375, "sft_loss": 0.61328125, "step": 5990 }, { "dpo_loss": 0.006683349609375, "epoch": 0.96, "final_loss": 0.006683349609375, "grad_norm": 0.0, "learning_rate": 4.317705395922966e-09, "loss": 0.1483, "projector_lr": 1.2953116187768898e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.34375, "sft_loss": 0.62109375, "step": 5991 }, { "dpo_loss": 0.11865234375, "epoch": 0.96, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 4.284476013848459e-09, "loss": 0.0867, "projector_lr": 1.2853428041545378e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.8125, "sft_loss": 0.74609375, "step": 5992 }, { "dpo_loss": 0.09765625, "epoch": 0.96, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 4.251374443889677e-09, "loss": 0.071, "projector_lr": 1.2754123331669032e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.21875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.96875, "sft_loss": 0.58984375, "step": 5993 }, { "dpo_loss": 0.35546875, "epoch": 0.96, "final_loss": 0.35546875, "grad_norm": 0.0, "learning_rate": 4.218400694581125e-09, "loss": 0.3566, "projector_lr": 1.2655202083743378e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.28125, "sft_loss": 0.76171875, "step": 5994 }, { "dpo_loss": 0.322265625, "epoch": 0.96, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 4.185554774424727e-09, "loss": 0.2551, "projector_lr": 1.255666432327418e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.59375, "rewards_train/margins": 3.46875, "rewards_train/rejected": -6.0625, "sft_loss": 1.734375, "step": 5995 }, { "dpo_loss": 0.04052734375, "epoch": 0.96, "final_loss": 0.04052734375, "grad_norm": 0.0, "learning_rate": 4.15283669188915e-09, "loss": 0.1918, "projector_lr": 1.245851007566745e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640625, "rewards_train/margins": 4.625, "rewards_train/rejected": -6.28125, "sft_loss": 0.74609375, "step": 5996 }, { "dpo_loss": 0.115234375, "epoch": 0.96, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 4.1202464554102036e-09, "loss": 0.4539, "projector_lr": 1.2360739366230612e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.34375, "sft_loss": 0.60546875, "step": 5997 }, { "dpo_loss": 0.408203125, "epoch": 0.96, "final_loss": 0.408203125, "grad_norm": 0.0, "learning_rate": 4.087784073390832e-09, "loss": 0.303, "projector_lr": 1.2263352220172497e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 2.84375, "rewards_train/rejected": -3.453125, "sft_loss": 0.6796875, "step": 5998 }, { "dpo_loss": 0.455078125, "epoch": 0.96, "final_loss": 0.455078125, "grad_norm": 0.0, "learning_rate": 4.055449554200896e-09, "loss": 0.2503, "projector_lr": 1.2166348662602688e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -2.21875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -7.0625, "sft_loss": 0.82421875, "step": 5999 }, { "dpo_loss": 0.59375, "epoch": 0.96, "final_loss": 0.59375, "grad_norm": 0.0, "learning_rate": 4.023242906177226e-09, "loss": 0.5625, "projector_lr": 1.206972871853168e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 3.125, "rewards_train/rejected": -3.84375, "sft_loss": 0.8359375, "step": 6000 }, { "dpo_loss": 0.2099609375, "epoch": 0.96, "final_loss": 0.2099609375, "grad_norm": 0.0, "learning_rate": 3.991164137623959e-09, "loss": 0.2388, "projector_lr": 1.1973492412871879e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 6.4375, "rewards_train/rejected": -7.625, "sft_loss": 0.78125, "step": 6001 }, { "dpo_loss": 0.045166015625, "epoch": 0.96, "final_loss": 0.045166015625, "grad_norm": 0.0, "learning_rate": 3.959213256811922e-09, "loss": 0.0928, "projector_lr": 1.1877639770435766e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.66796875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.875, "sft_loss": 0.48828125, "step": 6002 }, { "dpo_loss": 0.412109375, "epoch": 0.96, "final_loss": 0.412109375, "grad_norm": 0.0, "learning_rate": 3.927390271979192e-09, "loss": 0.2441, "projector_lr": 1.1782170815937576e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.84375, "sft_loss": 0.671875, "step": 6003 }, { "dpo_loss": 0.1484375, "epoch": 0.96, "final_loss": 0.1484375, "grad_norm": 0.0, "learning_rate": 3.895695191330761e-09, "loss": 0.2998, "projector_lr": 1.1687085573992285e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": 0.2001953125, "rewards_train/margins": 5.65625, "rewards_train/rejected": -5.4375, "sft_loss": 0.70703125, "step": 6004 }, { "dpo_loss": 0.006011962890625, "epoch": 0.96, "final_loss": 0.006011962890625, "grad_norm": 0.0, "learning_rate": 3.864128023038815e-09, "loss": 0.0966, "projector_lr": 1.1592384069116446e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0654296875, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.03125, "sft_loss": 0.5234375, "step": 6005 }, { "dpo_loss": 0.365234375, "epoch": 0.96, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 3.832688775242288e-09, "loss": 0.2308, "projector_lr": 1.1498066325726863e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.578125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.96875, "sft_loss": 0.8984375, "step": 6006 }, { "dpo_loss": 0.09130859375, "epoch": 0.96, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 3.801377456047306e-09, "loss": 0.0758, "projector_lr": 1.1404132368141918e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.46484375, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.3125, "sft_loss": 1.3125, "step": 6007 }, { "dpo_loss": 0.6875, "epoch": 0.96, "final_loss": 0.6875, "grad_norm": 0.0, "learning_rate": 3.770194073526967e-09, "loss": 0.4781, "projector_lr": 1.13105822205809e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -2.09375, "rewards_train/margins": 2.109375, "rewards_train/rejected": -4.21875, "sft_loss": 0.81640625, "step": 6008 }, { "dpo_loss": 0.1875, "epoch": 0.96, "final_loss": 0.1875, "grad_norm": 0.0, "learning_rate": 3.739138635721451e-09, "loss": 0.1069, "projector_lr": 1.1217415907164353e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.8046875, "rewards_train/margins": 2.28125, "rewards_train/rejected": -4.09375, "sft_loss": 1.3203125, "step": 6009 }, { "dpo_loss": 0.1015625, "epoch": 0.96, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 3.708211150637852e-09, "loss": 0.2394, "projector_lr": 1.1124633451913557e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.109375, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.4375, "sft_loss": 0.81640625, "step": 6010 }, { "dpo_loss": 0.0037689208984375, "epoch": 0.96, "final_loss": 0.0037689208984375, "grad_norm": 0.0, "learning_rate": 3.6774116262503486e-09, "loss": 0.2091, "projector_lr": 1.1032234878751046e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1875, "rewards_train/margins": 8.5, "rewards_train/rejected": -8.6875, "sft_loss": 0.61328125, "step": 6011 }, { "dpo_loss": 0.07421875, "epoch": 0.96, "final_loss": 0.07421875, "grad_norm": 0.0, "learning_rate": 3.646740070500087e-09, "loss": 0.0752, "projector_lr": 1.0940220211500262e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.21875, "sft_loss": 1.3203125, "step": 6012 }, { "dpo_loss": 0.8359375, "epoch": 0.96, "final_loss": 0.8359375, "grad_norm": 0.0, "learning_rate": 3.616196491295187e-09, "loss": 0.4397, "projector_lr": 1.0848589473885562e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.890625, "rewards_train/margins": 1.9609375, "rewards_train/rejected": -3.84375, "sft_loss": 0.72265625, "step": 6013 }, { "dpo_loss": 0.2109375, "epoch": 0.96, "final_loss": 0.2109375, "grad_norm": 0.0, "learning_rate": 3.585780896510793e-09, "loss": 0.2175, "projector_lr": 1.0757342689532378e-08, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.40625, "sft_loss": 0.625, "step": 6014 }, { "dpo_loss": 0.232421875, "epoch": 0.96, "final_loss": 0.232421875, "grad_norm": 0.0, "learning_rate": 3.5554932939891315e-09, "loss": 0.1871, "projector_lr": 1.0666479881967395e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.03125, "sft_loss": 0.71484375, "step": 6015 }, { "dpo_loss": 0.05908203125, "epoch": 0.96, "final_loss": 0.05908203125, "grad_norm": 0.0, "learning_rate": 3.525333691539345e-09, "loss": 0.0321, "projector_lr": 1.0576001074618036e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.09375, "sft_loss": 0.62890625, "step": 6016 }, { "dpo_loss": 0.00799560546875, "epoch": 0.96, "final_loss": 0.00799560546875, "grad_norm": 0.0, "learning_rate": 3.4953020969376024e-09, "loss": 0.084, "projector_lr": 1.0485906290812808e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.259765625, "rewards_train/margins": 6.3125, "rewards_train/rejected": -6.5625, "sft_loss": 0.640625, "step": 6017 }, { "dpo_loss": 0.031494140625, "epoch": 0.96, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 3.4653985179269874e-09, "loss": 0.1001, "projector_lr": 1.0396195553780963e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.11181640625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.3125, "sft_loss": 0.70703125, "step": 6018 }, { "dpo_loss": 0.103515625, "epoch": 0.96, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 3.435622962217777e-09, "loss": 0.1827, "projector_lr": 1.030686888665333e-08, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -6.25, "sft_loss": 0.859375, "step": 6019 }, { "dpo_loss": 0.87109375, "epoch": 0.96, "final_loss": 0.87109375, "grad_norm": 0.0, "learning_rate": 3.4059754374869966e-09, "loss": 0.5457, "projector_lr": 1.021792631246099e-08, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.4140625, "rewards_train/margins": 3.875, "rewards_train/rejected": -5.28125, "sft_loss": 1.0390625, "step": 6020 }, { "dpo_loss": 0.11669921875, "epoch": 0.96, "final_loss": 0.11669921875, "grad_norm": 0.0, "learning_rate": 3.376455951378754e-09, "loss": 0.2007, "projector_lr": 1.0129367854136262e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3984375, "rewards_train/margins": 3.53125, "rewards_train/rejected": -4.9375, "sft_loss": 0.96875, "step": 6021 }, { "dpo_loss": 0.00909423828125, "epoch": 0.96, "final_loss": 0.00909423828125, "grad_norm": 0.0, "learning_rate": 3.347064511504294e-09, "loss": 0.1244, "projector_lr": 1.0041193534512883e-08, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.06591796875, "rewards_train/margins": 5.875, "rewards_train/rejected": -5.9375, "sft_loss": 0.478515625, "step": 6022 }, { "dpo_loss": 0.1455078125, "epoch": 0.96, "final_loss": 0.1455078125, "grad_norm": 0.0, "learning_rate": 3.3178011254416104e-09, "loss": 0.1716, "projector_lr": 9.953403376324833e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.94921875, "rewards_train/margins": 5.875, "rewards_train/rejected": -6.8125, "sft_loss": 0.66796875, "step": 6023 }, { "dpo_loss": 0.01141357421875, "epoch": 0.96, "final_loss": 0.01141357421875, "grad_norm": 0.0, "learning_rate": 3.288665800735835e-09, "loss": 0.048, "projector_lr": 9.865997402207505e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.1123046875, "rewards_train/margins": 6.1875, "rewards_train/rejected": -6.3125, "sft_loss": 0.74609375, "step": 6024 }, { "dpo_loss": 0.0712890625, "epoch": 0.96, "final_loss": 0.0712890625, "grad_norm": 0.0, "learning_rate": 3.259658544899013e-09, "loss": 0.1252, "projector_lr": 9.77897563469704e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.15625, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.375, "sft_loss": 0.64453125, "step": 6025 }, { "dpo_loss": 0.6484375, "epoch": 0.96, "final_loss": 0.6484375, "grad_norm": 0.0, "learning_rate": 3.2307793654101636e-09, "loss": 0.4517, "projector_lr": 9.69233809623049e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.46875, "rewards_train/margins": 2.6875, "rewards_train/rejected": -4.15625, "sft_loss": 0.89453125, "step": 6026 }, { "dpo_loss": 0.00836181640625, "epoch": 0.96, "final_loss": 0.00836181640625, "grad_norm": 0.0, "learning_rate": 3.2020282697153844e-09, "loss": 0.0768, "projector_lr": 9.606084809146153e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.0625, "sft_loss": 0.80859375, "step": 6027 }, { "dpo_loss": 0.047607421875, "epoch": 0.96, "final_loss": 0.047607421875, "grad_norm": 0.0, "learning_rate": 3.1734052652276356e-09, "loss": 0.067, "projector_lr": 9.520215795682908e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2734375, "rewards_train/margins": 6.34375, "rewards_train/rejected": -6.625, "sft_loss": 0.58984375, "step": 6028 }, { "dpo_loss": 0.02490234375, "epoch": 0.96, "final_loss": 0.02490234375, "grad_norm": 0.0, "learning_rate": 3.1449103593268466e-09, "loss": 0.0188, "projector_lr": 9.43473107798054e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.828125, "rewards_train/margins": 6.0625, "rewards_train/rejected": -6.875, "sft_loss": 0.66796875, "step": 6029 }, { "dpo_loss": 0.0869140625, "epoch": 0.96, "final_loss": 0.0869140625, "grad_norm": 0.0, "learning_rate": 3.1165435593600296e-09, "loss": 0.0658, "projector_lr": 9.34963067808009e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.625, "sft_loss": 0.74609375, "step": 6030 }, { "dpo_loss": 0.06689453125, "epoch": 0.96, "final_loss": 0.06689453125, "grad_norm": 0.0, "learning_rate": 3.0883048726410568e-09, "loss": 0.1222, "projector_lr": 9.26491461792317e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.640625, "rewards_train/margins": 6.65625, "rewards_train/rejected": -8.3125, "sft_loss": 0.54296875, "step": 6031 }, { "dpo_loss": 0.70703125, "epoch": 0.97, "final_loss": 0.70703125, "grad_norm": 0.0, "learning_rate": 3.060194306450825e-09, "loss": 0.3705, "projector_lr": 9.180582919352477e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 5.34375, "rewards_train/rejected": -6.375, "sft_loss": 0.86328125, "step": 6032 }, { "dpo_loss": 0.12353515625, "epoch": 0.97, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 3.032211868037149e-09, "loss": 0.2523, "projector_lr": 9.096635604111447e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.09375, "sft_loss": 0.875, "step": 6033 }, { "dpo_loss": 0.380859375, "epoch": 0.97, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 3.0043575646149234e-09, "loss": 0.2866, "projector_lr": 9.01307269384477e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.71875, "sft_loss": 0.8046875, "step": 6034 }, { "dpo_loss": 0.01226806640625, "epoch": 0.97, "final_loss": 0.01226806640625, "grad_norm": 0.0, "learning_rate": 2.976631403365848e-09, "loss": 0.0337, "projector_lr": 8.929894210097545e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.322265625, "rewards_train/margins": 5.625, "rewards_train/rejected": -5.9375, "sft_loss": 0.8515625, "step": 6035 }, { "dpo_loss": 0.015869140625, "epoch": 0.97, "final_loss": 0.015869140625, "grad_norm": 0.0, "learning_rate": 2.9490333914386488e-09, "loss": 0.3443, "projector_lr": 8.847100174315948e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.341796875, "rewards_train/margins": 6.59375, "rewards_train/rejected": -6.9375, "sft_loss": 0.69140625, "step": 6036 }, { "dpo_loss": 0.154296875, "epoch": 0.97, "final_loss": 0.154296875, "grad_norm": 0.0, "learning_rate": 2.921563535949134e-09, "loss": 0.3846, "projector_lr": 8.764690607847403e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.9375, "sft_loss": 0.8359375, "step": 6037 }, { "dpo_loss": 0.058349609375, "epoch": 0.97, "final_loss": 0.058349609375, "grad_norm": 0.0, "learning_rate": 2.894221843979805e-09, "loss": 0.0336, "projector_lr": 8.682665531939415e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.412109375, "rewards_train/margins": 6.25, "rewards_train/rejected": -6.65625, "sft_loss": 0.5859375, "step": 6038 }, { "dpo_loss": 0.0634765625, "epoch": 0.97, "final_loss": 0.0634765625, "grad_norm": 0.0, "learning_rate": 2.8670083225803556e-09, "loss": 0.0961, "projector_lr": 8.601024967741066e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.140625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -6.09375, "sft_loss": 0.9375, "step": 6039 }, { "dpo_loss": 0.236328125, "epoch": 0.97, "final_loss": 0.236328125, "grad_norm": 0.0, "learning_rate": 2.8399229787673395e-09, "loss": 0.1633, "projector_lr": 8.51976893630202e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.9921875, "rewards_train/margins": 4.0625, "rewards_train/rejected": -6.0625, "sft_loss": 0.89453125, "step": 6040 }, { "dpo_loss": 0.1865234375, "epoch": 0.97, "final_loss": 0.1865234375, "grad_norm": 0.0, "learning_rate": 2.812965819524227e-09, "loss": 0.3564, "projector_lr": 8.438897458572681e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.0, "sft_loss": 0.6875, "step": 6041 }, { "dpo_loss": 0.0556640625, "epoch": 0.97, "final_loss": 0.0556640625, "grad_norm": 0.0, "learning_rate": 2.7861368518015682e-09, "loss": 0.0717, "projector_lr": 8.358410555404706e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.16796875, "rewards_train/margins": 4.15625, "rewards_train/rejected": -4.3125, "sft_loss": 0.86328125, "step": 6042 }, { "dpo_loss": 0.1923828125, "epoch": 0.97, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 2.759436082516664e-09, "loss": 0.2584, "projector_lr": 8.278308247549993e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9140625, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.96875, "sft_loss": 0.8984375, "step": 6043 }, { "dpo_loss": 0.060791015625, "epoch": 0.97, "final_loss": 0.060791015625, "grad_norm": 0.0, "learning_rate": 2.7328635185539518e-09, "loss": 0.2537, "projector_lr": 8.198590555661855e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9375, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.625, "sft_loss": 0.67578125, "step": 6044 }, { "dpo_loss": 0.0216064453125, "epoch": 0.97, "final_loss": 0.0216064453125, "grad_norm": 0.0, "learning_rate": 2.706419166764673e-09, "loss": 0.2194, "projector_lr": 8.119257500294019e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.38671875, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.0625, "sft_loss": 0.69921875, "step": 6045 }, { "dpo_loss": 0.06787109375, "epoch": 0.97, "final_loss": 0.06787109375, "grad_norm": 0.0, "learning_rate": 2.680103033967096e-09, "loss": 0.0602, "projector_lr": 8.040309101901288e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6953125, "rewards_train/margins": 5.09375, "rewards_train/rejected": -5.8125, "sft_loss": 0.81640625, "step": 6046 }, { "dpo_loss": 0.0771484375, "epoch": 0.97, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 2.6539151269464043e-09, "loss": 0.0418, "projector_lr": 7.961745380839213e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.0, "sft_loss": 0.890625, "step": 6047 }, { "dpo_loss": 0.06005859375, "epoch": 0.97, "final_loss": 0.06005859375, "grad_norm": 0.0, "learning_rate": 2.6278554524547526e-09, "loss": 0.1156, "projector_lr": 7.883566357364258e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.71875, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.3125, "sft_loss": 0.6328125, "step": 6048 }, { "dpo_loss": 0.251953125, "epoch": 0.97, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 2.601924017211099e-09, "loss": 0.1759, "projector_lr": 7.805772051633298e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.63671875, "rewards_train/margins": 3.296875, "rewards_train/rejected": -3.9375, "sft_loss": 0.80078125, "step": 6049 }, { "dpo_loss": 0.263671875, "epoch": 0.97, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 2.5761208279015956e-09, "loss": 0.1807, "projector_lr": 7.728362483704788e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9375, "rewards_train/margins": 3.875, "rewards_train/rejected": -4.8125, "sft_loss": 0.7734375, "step": 6050 }, { "dpo_loss": 0.1904296875, "epoch": 0.97, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 2.5504458911790315e-09, "loss": 0.1016, "projector_lr": 7.651337673537096e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.65625, "sft_loss": 0.7890625, "step": 6051 }, { "dpo_loss": 0.10546875, "epoch": 0.97, "final_loss": 0.10546875, "grad_norm": 0.0, "learning_rate": 2.524899213663334e-09, "loss": 0.2442, "projector_lr": 7.574697640990003e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.3125, "rewards_train/margins": 6.15625, "rewards_train/rejected": -8.4375, "sft_loss": 1.0390625, "step": 6052 }, { "dpo_loss": 0.09619140625, "epoch": 0.97, "final_loss": 0.09619140625, "grad_norm": 0.0, "learning_rate": 2.499480801941234e-09, "loss": 0.1261, "projector_lr": 7.498442405823703e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.0, "rewards_train/margins": 6.59375, "rewards_train/rejected": -8.5625, "sft_loss": 0.85546875, "step": 6053 }, { "dpo_loss": 0.703125, "epoch": 0.97, "final_loss": 0.703125, "grad_norm": 0.0, "learning_rate": 2.4741906625665443e-09, "loss": 0.4163, "projector_lr": 7.422571987699634e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.21875, "rewards_train/margins": 2.671875, "rewards_train/rejected": -3.890625, "sft_loss": 0.96484375, "step": 6054 }, { "dpo_loss": 0.322265625, "epoch": 0.97, "final_loss": 0.322265625, "grad_norm": 0.0, "learning_rate": 2.4490288020598826e-09, "loss": 0.3004, "projector_lr": 7.347086406179649e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 4.0625, "rewards_train/rejected": -4.65625, "sft_loss": 0.76171875, "step": 6055 }, { "dpo_loss": 0.326171875, "epoch": 0.97, "final_loss": 0.326171875, "grad_norm": 0.0, "learning_rate": 2.4239952269087817e-09, "loss": 0.2076, "projector_lr": 7.271985680726345e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.09375, "rewards_train/margins": 3.40625, "rewards_train/rejected": -4.5, "sft_loss": 0.65625, "step": 6056 }, { "dpo_loss": 0.0272216796875, "epoch": 0.97, "final_loss": 0.0272216796875, "grad_norm": 0.0, "learning_rate": 2.3990899435678004e-09, "loss": 0.2471, "projector_lr": 7.1972698307034015e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.345703125, "rewards_train/margins": 5.8125, "rewards_train/rejected": -6.15625, "sft_loss": 0.6484375, "step": 6057 }, { "dpo_loss": 0.041748046875, "epoch": 0.97, "final_loss": 0.041748046875, "grad_norm": 0.0, "learning_rate": 2.374312958458247e-09, "loss": 0.0612, "projector_lr": 7.12293887537474e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.74609375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.9375, "sft_loss": 0.7109375, "step": 6058 }, { "dpo_loss": 0.1767578125, "epoch": 0.97, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 2.3496642779686214e-09, "loss": 0.1006, "projector_lr": 7.0489928339058654e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.298828125, "rewards_train/margins": 2.78125, "rewards_train/rejected": -3.078125, "sft_loss": 0.89453125, "step": 6059 }, { "dpo_loss": 0.07177734375, "epoch": 0.97, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 2.325143908454008e-09, "loss": 0.1, "projector_lr": 6.975431725362025e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.2255859375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.1875, "sft_loss": 0.77734375, "step": 6060 }, { "dpo_loss": 0.265625, "epoch": 0.97, "final_loss": 0.265625, "grad_norm": 0.0, "learning_rate": 2.300751856236682e-09, "loss": 0.2806, "projector_lr": 6.902255568710048e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.71875, "rewards_train/margins": 2.984375, "rewards_train/rejected": -3.703125, "sft_loss": 0.78125, "step": 6061 }, { "dpo_loss": 0.19921875, "epoch": 0.97, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 2.2764881276057245e-09, "loss": 0.1226, "projector_lr": 6.829464382817174e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.93359375, "rewards_train/margins": 3.765625, "rewards_train/rejected": -4.6875, "sft_loss": 0.7890625, "step": 6062 }, { "dpo_loss": 0.062255859375, "epoch": 0.97, "final_loss": 0.062255859375, "grad_norm": 0.0, "learning_rate": 2.2523527288171863e-09, "loss": 0.1928, "projector_lr": 6.757058186451559e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.3125, "sft_loss": 0.7421875, "step": 6063 }, { "dpo_loss": 0.0732421875, "epoch": 0.97, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 2.228345666093867e-09, "loss": 0.2035, "projector_lr": 6.685036998281602e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2421875, "rewards_train/margins": 6.6875, "rewards_train/rejected": -7.9375, "sft_loss": 0.98828125, "step": 6064 }, { "dpo_loss": 0.45703125, "epoch": 0.97, "final_loss": 0.45703125, "grad_norm": 0.0, "learning_rate": 2.204466945625705e-09, "loss": 0.2943, "projector_lr": 6.613400836877115e-09, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 3.109375, "rewards_train/rejected": -4.125, "sft_loss": 1.1484375, "step": 6065 }, { "dpo_loss": 0.115234375, "epoch": 0.97, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 2.1807165735693854e-09, "loss": 0.105, "projector_lr": 6.542149720708157e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.326171875, "rewards_train/margins": 7.25, "rewards_train/rejected": -7.5625, "sft_loss": 0.44140625, "step": 6066 }, { "dpo_loss": 0.365234375, "epoch": 0.97, "final_loss": 0.365234375, "grad_norm": 0.0, "learning_rate": 2.157094556048511e-09, "loss": 0.3518, "projector_lr": 6.4712836681455335e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.90625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.90625, "sft_loss": 0.82421875, "step": 6067 }, { "dpo_loss": 0.03076171875, "epoch": 0.97, "final_loss": 0.03076171875, "grad_norm": 0.0, "learning_rate": 2.133600899153709e-09, "loss": 0.1203, "projector_lr": 6.400802697461128e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.09375, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.46875, "sft_loss": 0.953125, "step": 6068 }, { "dpo_loss": 0.69921875, "epoch": 0.97, "final_loss": 0.69921875, "grad_norm": 0.0, "learning_rate": 2.1102356089424124e-09, "loss": 0.6834, "projector_lr": 6.330706826827237e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4375, "rewards_train/margins": 5.84375, "rewards_train/rejected": -7.28125, "sft_loss": 1.0390625, "step": 6069 }, { "dpo_loss": 0.2158203125, "epoch": 0.97, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 2.086998691438968e-09, "loss": 0.1175, "projector_lr": 6.260996074316905e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.15625, "rewards_train/rejected": -4.53125, "sft_loss": 0.7890625, "step": 6070 }, { "dpo_loss": 0.435546875, "epoch": 0.97, "final_loss": 0.435546875, "grad_norm": 0.0, "learning_rate": 2.0638901526346398e-09, "loss": 0.253, "projector_lr": 6.191670457903919e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4921875, "rewards_train/margins": 2.34375, "rewards_train/rejected": -3.828125, "sft_loss": 0.890625, "step": 6071 }, { "dpo_loss": 0.263671875, "epoch": 0.97, "final_loss": 0.263671875, "grad_norm": 0.0, "learning_rate": 2.04090999848755e-09, "loss": 0.376, "projector_lr": 6.12272999546265e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1376953125, "rewards_train/margins": 3.0, "rewards_train/rejected": -3.140625, "sft_loss": 0.8359375, "step": 6072 }, { "dpo_loss": 0.04248046875, "epoch": 0.97, "final_loss": 0.04248046875, "grad_norm": 0.0, "learning_rate": 2.0180582349227927e-09, "loss": 0.1801, "projector_lr": 6.054174704768378e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.57421875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.125, "sft_loss": 0.828125, "step": 6073 }, { "dpo_loss": 0.04345703125, "epoch": 0.97, "final_loss": 0.04345703125, "grad_norm": 0.0, "learning_rate": 1.995334867832321e-09, "loss": 0.0236, "projector_lr": 5.986004603496964e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.078125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -6.75, "sft_loss": 0.7890625, "step": 6074 }, { "dpo_loss": 0.26953125, "epoch": 0.97, "final_loss": 0.26953125, "grad_norm": 0.0, "learning_rate": 1.972739903074949e-09, "loss": 0.1817, "projector_lr": 5.918219709224848e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.21875, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.46875, "sft_loss": 0.78515625, "step": 6075 }, { "dpo_loss": 0.05712890625, "epoch": 0.97, "final_loss": 0.05712890625, "grad_norm": 0.0, "learning_rate": 1.9502733464764608e-09, "loss": 0.1413, "projector_lr": 5.850820039429383e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 5.625, "rewards_train/rejected": -7.0, "sft_loss": 0.94921875, "step": 6076 }, { "dpo_loss": 0.32421875, "epoch": 0.97, "final_loss": 0.32421875, "grad_norm": 0.0, "learning_rate": 1.9279352038295005e-09, "loss": 0.1726, "projector_lr": 5.783805611488502e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 3.34375, "rewards_train/rejected": -4.25, "sft_loss": 0.76171875, "step": 6077 }, { "dpo_loss": 0.11767578125, "epoch": 0.97, "final_loss": 0.11767578125, "grad_norm": 0.0, "learning_rate": 1.9057254808935163e-09, "loss": 0.2158, "projector_lr": 5.717176442680549e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.9375, "sft_loss": 0.58203125, "step": 6078 }, { "dpo_loss": 0.052978515625, "epoch": 0.97, "final_loss": 0.052978515625, "grad_norm": 0.0, "learning_rate": 1.8836441833949833e-09, "loss": 0.0402, "projector_lr": 5.650932550184951e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 5.90625, "rewards_train/rejected": -7.25, "sft_loss": 0.87109375, "step": 6079 }, { "dpo_loss": 0.07177734375, "epoch": 0.97, "final_loss": 0.07177734375, "grad_norm": 0.0, "learning_rate": 1.8616913170271808e-09, "loss": 0.0885, "projector_lr": 5.585073951081543e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7265625, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.5, "sft_loss": 0.6484375, "step": 6080 }, { "dpo_loss": 0.1357421875, "epoch": 0.97, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 1.8398668874503032e-09, "loss": 0.3764, "projector_lr": 5.51960066235091e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.234375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.9375, "sft_loss": 0.625, "step": 6081 }, { "dpo_loss": 0.451171875, "epoch": 0.97, "final_loss": 0.451171875, "grad_norm": 0.0, "learning_rate": 1.8181709002914048e-09, "loss": 0.3078, "projector_lr": 5.454512700874215e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.921875, "rewards_train/margins": 3.03125, "rewards_train/rejected": -3.953125, "sft_loss": 1.15625, "step": 6082 }, { "dpo_loss": 0.11474609375, "epoch": 0.97, "final_loss": 0.11474609375, "grad_norm": 0.0, "learning_rate": 1.7966033611444553e-09, "loss": 0.1359, "projector_lr": 5.389810083433366e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5703125, "rewards_train/margins": 4.3125, "rewards_train/rejected": -4.875, "sft_loss": 0.8671875, "step": 6083 }, { "dpo_loss": 0.011962890625, "epoch": 0.97, "final_loss": 0.011962890625, "grad_norm": 0.0, "learning_rate": 1.7751642755702844e-09, "loss": 0.1316, "projector_lr": 5.3254928267108536e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.5625, "sft_loss": 0.69921875, "step": 6084 }, { "dpo_loss": 0.0703125, "epoch": 0.97, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 1.7538536490966371e-09, "loss": 0.0562, "projector_lr": 5.2615609472899115e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10595703125, "rewards_train/margins": 5.71875, "rewards_train/rejected": -5.84375, "sft_loss": 0.91015625, "step": 6085 }, { "dpo_loss": 0.07275390625, "epoch": 0.97, "final_loss": 0.07275390625, "grad_norm": 0.0, "learning_rate": 1.7326714872180626e-09, "loss": 0.0596, "projector_lr": 5.198014461654188e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.15625, "rewards_train/margins": 5.15625, "rewards_train/rejected": -4.96875, "sft_loss": 0.73046875, "step": 6086 }, { "dpo_loss": 0.07568359375, "epoch": 0.97, "final_loss": 0.07568359375, "grad_norm": 0.0, "learning_rate": 1.7116177953960253e-09, "loss": 0.1843, "projector_lr": 5.134853386188076e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.828125, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.8125, "sft_loss": 0.83203125, "step": 6087 }, { "dpo_loss": 0.216796875, "epoch": 0.97, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 1.690692579058961e-09, "loss": 0.1651, "projector_lr": 5.072077737176883e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.62109375, "rewards_train/margins": 4.03125, "rewards_train/rejected": -4.65625, "sft_loss": 0.72265625, "step": 6088 }, { "dpo_loss": 0.21875, "epoch": 0.97, "final_loss": 0.21875, "grad_norm": 0.0, "learning_rate": 1.6698958436019984e-09, "loss": 0.2822, "projector_lr": 5.0096875308059955e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5859375, "rewards_train/margins": 4.78125, "rewards_train/rejected": -6.375, "sft_loss": 0.703125, "step": 6089 }, { "dpo_loss": 0.1376953125, "epoch": 0.97, "final_loss": 0.1376953125, "grad_norm": 0.0, "learning_rate": 1.6492275943872925e-09, "loss": 0.0875, "projector_lr": 4.947682783161877e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.53515625, "rewards_train/margins": 6.09375, "rewards_train/rejected": -6.625, "sft_loss": 0.77734375, "step": 6090 }, { "dpo_loss": 0.380859375, "epoch": 0.97, "final_loss": 0.380859375, "grad_norm": 0.0, "learning_rate": 1.6286878367437473e-09, "loss": 0.2556, "projector_lr": 4.8860635102312426e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 1.8203125, "rewards_train/rejected": -2.34375, "sft_loss": 0.77734375, "step": 6091 }, { "dpo_loss": 0.490234375, "epoch": 0.97, "final_loss": 0.490234375, "grad_norm": 0.0, "learning_rate": 1.6082765759672933e-09, "loss": 0.3399, "projector_lr": 4.82482972790188e-09, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.375, "rewards_train/rejected": -5.6875, "sft_loss": 0.84765625, "step": 6092 }, { "dpo_loss": 0.00958251953125, "epoch": 0.97, "final_loss": 0.00958251953125, "grad_norm": 0.0, "learning_rate": 1.5879938173205542e-09, "loss": 0.1734, "projector_lr": 4.763981451961663e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.35546875, "rewards_train/margins": 6.0, "rewards_train/rejected": -5.65625, "sft_loss": 0.53125, "step": 6093 }, { "dpo_loss": 0.39453125, "epoch": 0.98, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 1.56783956603318e-09, "loss": 0.3664, "projector_lr": 4.70351869809954e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 4.6875, "rewards_train/rejected": -6.03125, "sft_loss": 0.79296875, "step": 6094 }, { "dpo_loss": 0.017333984375, "epoch": 0.98, "final_loss": 0.017333984375, "grad_norm": 0.0, "learning_rate": 1.547813827301625e-09, "loss": 0.2901, "projector_lr": 4.6434414819048756e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.0625, "sft_loss": 0.8359375, "step": 6095 }, { "dpo_loss": 0.083984375, "epoch": 0.98, "final_loss": 0.083984375, "grad_norm": 0.0, "learning_rate": 1.5279166062890924e-09, "loss": 0.2279, "projector_lr": 4.583749818867278e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5859375, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.4375, "sft_loss": 0.73828125, "step": 6096 }, { "dpo_loss": 0.0498046875, "epoch": 0.98, "final_loss": 0.0498046875, "grad_norm": 0.0, "learning_rate": 1.508147908125812e-09, "loss": 0.0817, "projector_lr": 4.524443724377436e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1796875, "rewards_train/margins": 5.09375, "rewards_train/rejected": -6.3125, "sft_loss": 0.91015625, "step": 6097 }, { "dpo_loss": 0.38671875, "epoch": 0.98, "final_loss": 0.38671875, "grad_norm": 0.0, "learning_rate": 1.4885077379088174e-09, "loss": 0.2702, "projector_lr": 4.465523213726452e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.796875, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.9375, "sft_loss": 0.79296875, "step": 6098 }, { "dpo_loss": 0.201171875, "epoch": 0.98, "final_loss": 0.201171875, "grad_norm": 0.0, "learning_rate": 1.4689961007020023e-09, "loss": 0.1871, "projector_lr": 4.406988302106007e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.91796875, "rewards_train/margins": 5.21875, "rewards_train/rejected": -6.15625, "sft_loss": 0.77734375, "step": 6099 }, { "dpo_loss": 0.010986328125, "epoch": 0.98, "final_loss": 0.010986328125, "grad_norm": 0.0, "learning_rate": 1.4496130015361762e-09, "loss": 0.0592, "projector_lr": 4.348839004608529e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 7.0625, "rewards_train/rejected": -8.125, "sft_loss": 0.6328125, "step": 6100 }, { "dpo_loss": 0.177734375, "epoch": 0.98, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 1.4303584454088413e-09, "loss": 0.2181, "projector_lr": 4.291075336226524e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 6.0, "rewards_train/rejected": -6.875, "sft_loss": 1.03125, "step": 6101 }, { "dpo_loss": 0.23046875, "epoch": 0.98, "final_loss": 0.23046875, "grad_norm": 0.0, "learning_rate": 1.411232437284582e-09, "loss": 0.1893, "projector_lr": 4.233697311853746e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6640625, "rewards_train/margins": 4.0, "rewards_train/rejected": -4.6875, "sft_loss": 0.85546875, "step": 6102 }, { "dpo_loss": 0.16796875, "epoch": 0.98, "final_loss": 0.16796875, "grad_norm": 0.0, "learning_rate": 1.39223498209462e-09, "loss": 0.4234, "projector_lr": 4.17670494628386e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2890625, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.5625, "sft_loss": 0.82421875, "step": 6103 }, { "dpo_loss": 0.1357421875, "epoch": 0.98, "final_loss": 0.1357421875, "grad_norm": 0.0, "learning_rate": 1.3733660847372597e-09, "loss": 0.0924, "projector_lr": 4.120098254211779e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2734375, "rewards_train/margins": 3.890625, "rewards_train/rejected": -5.15625, "sft_loss": 0.7265625, "step": 6104 }, { "dpo_loss": 0.12158203125, "epoch": 0.98, "final_loss": 0.12158203125, "grad_norm": 0.0, "learning_rate": 1.3546257500774427e-09, "loss": 0.095, "projector_lr": 4.063877250232328e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.474609375, "rewards_train/margins": 6.71875, "rewards_train/rejected": -7.1875, "sft_loss": 0.74609375, "step": 6105 }, { "dpo_loss": 0.05029296875, "epoch": 0.98, "final_loss": 0.05029296875, "grad_norm": 0.0, "learning_rate": 1.3360139829470817e-09, "loss": 0.064, "projector_lr": 4.008041948841246e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3046875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -4.84375, "sft_loss": 0.66796875, "step": 6106 }, { "dpo_loss": 0.078125, "epoch": 0.98, "final_loss": 0.078125, "grad_norm": 0.0, "learning_rate": 1.3175307881448937e-09, "loss": 0.1785, "projector_lr": 3.952592364434682e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.28125, "sft_loss": 1.0234375, "step": 6107 }, { "dpo_loss": 0.126953125, "epoch": 0.98, "final_loss": 0.126953125, "grad_norm": 0.0, "learning_rate": 1.2991761704365112e-09, "loss": 0.083, "projector_lr": 3.897528511309534e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80859375, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.25, "sft_loss": 0.8828125, "step": 6108 }, { "dpo_loss": 0.1611328125, "epoch": 0.98, "final_loss": 0.1611328125, "grad_norm": 0.0, "learning_rate": 1.2809501345543706e-09, "loss": 0.3918, "projector_lr": 3.842850403663112e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.390625, "rewards_train/margins": 3.59375, "rewards_train/rejected": -4.96875, "sft_loss": 0.72265625, "step": 6109 }, { "dpo_loss": 0.09521484375, "epoch": 0.98, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 1.2628526851977128e-09, "loss": 0.0858, "projector_lr": 3.788558055593139e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.3515625, "rewards_train/margins": 5.75, "rewards_train/rejected": -7.09375, "sft_loss": 1.40625, "step": 6110 }, { "dpo_loss": 0.050048828125, "epoch": 0.98, "final_loss": 0.050048828125, "grad_norm": 0.0, "learning_rate": 1.2448838270326945e-09, "loss": 0.0392, "projector_lr": 3.734651481098084e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.1875, "sft_loss": 0.69921875, "step": 6111 }, { "dpo_loss": 0.234375, "epoch": 0.98, "final_loss": 0.234375, "grad_norm": 0.0, "learning_rate": 1.2270435646922762e-09, "loss": 0.3378, "projector_lr": 3.6811306940768284e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.875, "rewards_train/margins": 4.84375, "rewards_train/rejected": -6.71875, "sft_loss": 0.90234375, "step": 6112 }, { "dpo_loss": 0.11865234375, "epoch": 0.98, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 1.2093319027763337e-09, "loss": 0.1293, "projector_lr": 3.6279957083290017e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.390625, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.5625, "sft_loss": 0.8125, "step": 6113 }, { "dpo_loss": 0.1259765625, "epoch": 0.98, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 1.191748845851437e-09, "loss": 0.064, "projector_lr": 3.575246537554311e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.15625, "sft_loss": 0.77734375, "step": 6114 }, { "dpo_loss": 0.103515625, "epoch": 0.98, "final_loss": 0.103515625, "grad_norm": 0.0, "learning_rate": 1.174294398451181e-09, "loss": 0.1216, "projector_lr": 3.5228831953535436e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.60546875, "rewards_train/margins": 5.28125, "rewards_train/rejected": -5.875, "sft_loss": 0.8125, "step": 6115 }, { "dpo_loss": 0.1259765625, "epoch": 0.98, "final_loss": 0.1259765625, "grad_norm": 0.0, "learning_rate": 1.1569685650758555e-09, "loss": 0.1874, "projector_lr": 3.4709056952275664e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.400390625, "rewards_train/margins": 5.21875, "rewards_train/rejected": -5.625, "sft_loss": 0.6796875, "step": 6116 }, { "dpo_loss": 0.140625, "epoch": 0.98, "final_loss": 0.140625, "grad_norm": 0.0, "learning_rate": 1.1397713501926087e-09, "loss": 0.0895, "projector_lr": 3.4193140505778265e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.921875, "rewards_train/margins": 3.296875, "rewards_train/rejected": -4.21875, "sft_loss": 1.03125, "step": 6117 }, { "dpo_loss": 0.04052734375, "epoch": 0.98, "final_loss": 0.04052734375, "grad_norm": 0.0, "learning_rate": 1.1227027582355608e-09, "loss": 0.0427, "projector_lr": 3.3681082747066827e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.03125, "rewards_train/rejected": -6.25, "sft_loss": 0.75, "step": 6118 }, { "dpo_loss": 0.00848388671875, "epoch": 0.98, "final_loss": 0.00848388671875, "grad_norm": 0.0, "learning_rate": 1.1057627936054137e-09, "loss": 0.063, "projector_lr": 3.317288380816241e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.9609375, "rewards_train/margins": 7.5625, "rewards_train/rejected": -8.5, "sft_loss": 0.62890625, "step": 6119 }, { "dpo_loss": 0.056884765625, "epoch": 0.98, "final_loss": 0.056884765625, "grad_norm": 0.0, "learning_rate": 1.0889514606700623e-09, "loss": 0.1036, "projector_lr": 3.2668543820101872e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 6.125, "rewards_train/rejected": -6.90625, "sft_loss": 0.77734375, "step": 6120 }, { "dpo_loss": 0.306640625, "epoch": 0.98, "final_loss": 0.306640625, "grad_norm": 0.0, "learning_rate": 1.0722687637638727e-09, "loss": 0.2645, "projector_lr": 3.2168062912916185e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.34375, "sft_loss": 0.828125, "step": 6121 }, { "dpo_loss": 0.62890625, "epoch": 0.98, "final_loss": 0.62890625, "grad_norm": 0.0, "learning_rate": 1.0557147071882933e-09, "loss": 0.659, "projector_lr": 3.16714412156488e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.34375, "rewards_train/margins": 3.78125, "rewards_train/rejected": -5.125, "sft_loss": 0.58984375, "step": 6122 }, { "dpo_loss": 0.171875, "epoch": 0.98, "final_loss": 0.171875, "grad_norm": 0.0, "learning_rate": 1.0392892952114097e-09, "loss": 0.1193, "projector_lr": 3.1178678856342293e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 3.359375, "rewards_train/rejected": -4.53125, "sft_loss": 0.640625, "step": 6123 }, { "dpo_loss": 0.09765625, "epoch": 0.98, "final_loss": 0.09765625, "grad_norm": 0.0, "learning_rate": 1.0229925320683896e-09, "loss": 0.1441, "projector_lr": 3.068977596205169e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.46875, "rewards_train/margins": 3.421875, "rewards_train/rejected": -3.890625, "sft_loss": 0.69921875, "step": 6124 }, { "dpo_loss": 0.181640625, "epoch": 0.98, "final_loss": 0.181640625, "grad_norm": 0.0, "learning_rate": 1.0068244219609833e-09, "loss": 0.1476, "projector_lr": 3.0204732658829503e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2109375, "rewards_train/margins": 4.625, "rewards_train/rejected": -5.84375, "sft_loss": 0.890625, "step": 6125 }, { "dpo_loss": 0.54296875, "epoch": 0.98, "final_loss": 0.54296875, "grad_norm": 0.0, "learning_rate": 9.907849690579118e-10, "loss": 0.2788, "projector_lr": 2.9723549071737353e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 2.765625, "rewards_train/rejected": -3.53125, "sft_loss": 0.9609375, "step": 6126 }, { "dpo_loss": 0.2578125, "epoch": 0.98, "final_loss": 0.2578125, "grad_norm": 0.0, "learning_rate": 9.748741774946444e-10, "loss": 0.2865, "projector_lr": 2.9246225324839337e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.99609375, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.75, "sft_loss": 0.671875, "step": 6127 }, { "dpo_loss": 0.00921630859375, "epoch": 0.98, "final_loss": 0.00921630859375, "grad_norm": 0.0, "learning_rate": 9.590920513735668e-10, "loss": 0.0375, "projector_lr": 2.8772761541207005e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.46875, "rewards_train/margins": 7.65625, "rewards_train/rejected": -9.125, "sft_loss": 0.77734375, "step": 6128 }, { "dpo_loss": 0.12353515625, "epoch": 0.98, "final_loss": 0.12353515625, "grad_norm": 0.0, "learning_rate": 9.434385947638125e-10, "loss": 0.2653, "projector_lr": 2.8303157842914374e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59375, "rewards_train/margins": 4.34375, "rewards_train/rejected": -4.9375, "sft_loss": 0.70703125, "step": 6129 }, { "dpo_loss": 0.130859375, "epoch": 0.98, "final_loss": 0.130859375, "grad_norm": 0.0, "learning_rate": 9.279138117014307e-10, "loss": 0.2356, "projector_lr": 2.783741435104292e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.96875, "sft_loss": 0.98046875, "step": 6130 }, { "dpo_loss": 0.1171875, "epoch": 0.98, "final_loss": 0.1171875, "grad_norm": 0.0, "learning_rate": 9.125177061891087e-10, "loss": 0.1783, "projector_lr": 2.7375531185673262e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5234375, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.09375, "sft_loss": 0.81640625, "step": 6131 }, { "dpo_loss": 0.271484375, "epoch": 0.98, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 8.972502821966154e-10, "loss": 0.317, "projector_lr": 2.691750846589847e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3125, "rewards_train/margins": 4.71875, "rewards_train/rejected": -6.03125, "sft_loss": 0.8359375, "step": 6132 }, { "dpo_loss": 0.2421875, "epoch": 0.98, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 8.821115436603021e-10, "loss": 0.1487, "projector_lr": 2.646334630980907e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 2.265625, "rewards_train/rejected": -3.515625, "sft_loss": 0.79296875, "step": 6133 }, { "dpo_loss": 0.0439453125, "epoch": 0.98, "final_loss": 0.0439453125, "grad_norm": 0.0, "learning_rate": 8.671014944834909e-10, "loss": 0.2027, "projector_lr": 2.601304483450473e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.203125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.6875, "sft_loss": 0.8515625, "step": 6134 }, { "dpo_loss": 0.09423828125, "epoch": 0.98, "final_loss": 0.09423828125, "grad_norm": 0.0, "learning_rate": 8.522201385362526e-10, "loss": 0.057, "projector_lr": 2.5566604156087582e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.53125, "rewards_train/margins": 5.59375, "rewards_train/rejected": -6.125, "sft_loss": 0.703125, "step": 6135 }, { "dpo_loss": 0.0986328125, "epoch": 0.98, "final_loss": 0.0986328125, "grad_norm": 0.0, "learning_rate": 8.374674796555736e-10, "loss": 0.1023, "projector_lr": 2.512402438966721e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.9296875, "rewards_train/margins": 3.90625, "rewards_train/rejected": -5.84375, "sft_loss": 0.890625, "step": 6136 }, { "dpo_loss": 0.01348876953125, "epoch": 0.98, "final_loss": 0.01348876953125, "grad_norm": 0.0, "learning_rate": 8.22843521645078e-10, "loss": 0.4909, "projector_lr": 2.468530564935234e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.8984375, "rewards_train/margins": 5.9375, "rewards_train/rejected": -6.84375, "sft_loss": 0.75390625, "step": 6137 }, { "dpo_loss": 0.0286865234375, "epoch": 0.98, "final_loss": 0.0286865234375, "grad_norm": 0.0, "learning_rate": 8.083482682753606e-10, "loss": 0.1475, "projector_lr": 2.425044804826082e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 8.375, "rewards_train/rejected": -9.0625, "sft_loss": 0.7421875, "step": 6138 }, { "dpo_loss": 0.026611328125, "epoch": 0.98, "final_loss": 0.026611328125, "grad_norm": 0.0, "learning_rate": 7.939817232838209e-10, "loss": 0.3812, "projector_lr": 2.3819451698514626e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.90625, "sft_loss": 0.671875, "step": 6139 }, { "dpo_loss": 0.2373046875, "epoch": 0.98, "final_loss": 0.2373046875, "grad_norm": 0.0, "learning_rate": 7.797438903746067e-10, "loss": 0.2556, "projector_lr": 2.3392316711238206e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1015625, "rewards_train/margins": 4.3125, "rewards_train/rejected": -5.40625, "sft_loss": 0.97265625, "step": 6140 }, { "dpo_loss": 0.1337890625, "epoch": 0.98, "final_loss": 0.1337890625, "grad_norm": 0.0, "learning_rate": 7.656347732186707e-10, "loss": 0.172, "projector_lr": 2.2969043196560124e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.140625, "rewards_train/margins": 6.8125, "rewards_train/rejected": -8.9375, "sft_loss": 0.90625, "step": 6141 }, { "dpo_loss": 0.4765625, "epoch": 0.98, "final_loss": 0.4765625, "grad_norm": 0.0, "learning_rate": 7.516543754538807e-10, "loss": 0.338, "projector_lr": 2.2549631263616423e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.140625, "rewards_train/margins": 7.375, "rewards_train/rejected": -8.5, "sft_loss": 0.8046875, "step": 6142 }, { "dpo_loss": 0.1064453125, "epoch": 0.98, "final_loss": 0.1064453125, "grad_norm": 0.0, "learning_rate": 7.378027006847975e-10, "loss": 0.1284, "projector_lr": 2.2134081020543927e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0078125, "rewards_train/margins": 5.96875, "rewards_train/rejected": -6.96875, "sft_loss": 0.7421875, "step": 6143 }, { "dpo_loss": 0.08984375, "epoch": 0.98, "final_loss": 0.08984375, "grad_norm": 0.0, "learning_rate": 7.240797524828979e-10, "loss": 0.1167, "projector_lr": 2.1722392574486937e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.73046875, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.65625, "sft_loss": 0.67578125, "step": 6144 }, { "dpo_loss": 0.0712890625, "epoch": 0.98, "final_loss": 0.0712890625, "grad_norm": 0.0, "learning_rate": 7.104855343864069e-10, "loss": 0.0765, "projector_lr": 2.131456603159221e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.462890625, "rewards_train/margins": 4.21875, "rewards_train/rejected": -4.65625, "sft_loss": 0.67578125, "step": 6145 }, { "dpo_loss": 0.220703125, "epoch": 0.98, "final_loss": 0.220703125, "grad_norm": 0.0, "learning_rate": 6.970200499003543e-10, "loss": 0.1312, "projector_lr": 2.0910601497010628e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.1630859375, "rewards_train/margins": 3.234375, "rewards_train/rejected": -3.390625, "sft_loss": 0.81640625, "step": 6146 }, { "dpo_loss": 0.1533203125, "epoch": 0.98, "final_loss": 0.1533203125, "grad_norm": 0.0, "learning_rate": 6.836833024965738e-10, "loss": 0.1423, "projector_lr": 2.0510499074897216e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.03125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.28125, "sft_loss": 0.8984375, "step": 6147 }, { "dpo_loss": 0.30078125, "epoch": 0.98, "final_loss": 0.30078125, "grad_norm": 0.0, "learning_rate": 6.704752956138149e-10, "loss": 0.1644, "projector_lr": 2.0114258868414448e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0546875, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.5, "sft_loss": 0.80859375, "step": 6148 }, { "dpo_loss": 0.330078125, "epoch": 0.98, "final_loss": 0.330078125, "grad_norm": 0.0, "learning_rate": 6.573960326574646e-10, "loss": 0.2776, "projector_lr": 1.972188097972394e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.84375, "rewards_train/margins": 4.21875, "rewards_train/rejected": -5.0625, "sft_loss": 0.65625, "step": 6149 }, { "dpo_loss": 0.26171875, "epoch": 0.98, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 6.444455169997698e-10, "loss": 0.1818, "projector_lr": 1.9333365509993095e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.09375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -6.28125, "sft_loss": 0.69921875, "step": 6150 }, { "dpo_loss": 0.08544921875, "epoch": 0.98, "final_loss": 0.08544921875, "grad_norm": 0.0, "learning_rate": 6.316237519798928e-10, "loss": 0.0778, "projector_lr": 1.8948712559396784e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0390625, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.5, "sft_loss": 0.73046875, "step": 6151 }, { "dpo_loss": 0.0264892578125, "epoch": 0.98, "final_loss": 0.0264892578125, "grad_norm": 0.0, "learning_rate": 6.189307409036893e-10, "loss": 0.1108, "projector_lr": 1.8567922227110678e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.431640625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.375, "sft_loss": 0.54296875, "step": 6152 }, { "dpo_loss": 0.177734375, "epoch": 0.98, "final_loss": 0.177734375, "grad_norm": 0.0, "learning_rate": 6.06366487043819e-10, "loss": 0.0983, "projector_lr": 1.819099461131457e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5390625, "rewards_train/margins": 3.609375, "rewards_train/rejected": -4.15625, "sft_loss": 0.8046875, "step": 6153 }, { "dpo_loss": 0.06591796875, "epoch": 0.98, "final_loss": 0.06591796875, "grad_norm": 0.0, "learning_rate": 5.939309936397463e-10, "loss": 0.2063, "projector_lr": 1.781792980919239e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.6796875, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.21875, "sft_loss": 0.609375, "step": 6154 }, { "dpo_loss": 0.2158203125, "epoch": 0.98, "final_loss": 0.2158203125, "grad_norm": 0.0, "learning_rate": 5.816242638978508e-10, "loss": 0.1438, "projector_lr": 1.7448727916935524e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 6.15625, "rewards_train/rejected": -7.03125, "sft_loss": 0.76953125, "step": 6155 }, { "dpo_loss": 0.6640625, "epoch": 0.98, "final_loss": 0.6640625, "grad_norm": 0.0, "learning_rate": 5.694463009911498e-10, "loss": 0.3476, "projector_lr": 1.7083389029734498e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.984375, "rewards_train/margins": 2.109375, "rewards_train/rejected": -3.09375, "sft_loss": 0.72265625, "step": 6156 }, { "dpo_loss": 0.031494140625, "epoch": 0.99, "final_loss": 0.031494140625, "grad_norm": 0.0, "learning_rate": 5.573971080595207e-10, "loss": 0.0185, "projector_lr": 1.6721913241785625e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.26953125, "rewards_train/margins": 6.53125, "rewards_train/rejected": -6.28125, "sft_loss": 0.734375, "step": 6157 }, { "dpo_loss": 0.310546875, "epoch": 0.99, "final_loss": 0.310546875, "grad_norm": 0.0, "learning_rate": 5.454766882097006e-10, "loss": 0.2166, "projector_lr": 1.636430064629102e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.90625, "rewards_train/margins": 3.28125, "rewards_train/rejected": -5.1875, "sft_loss": 1.1640625, "step": 6158 }, { "dpo_loss": 0.26171875, "epoch": 0.99, "final_loss": 0.26171875, "grad_norm": 0.0, "learning_rate": 5.336850445151753e-10, "loss": 0.2555, "projector_lr": 1.601055133545526e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.6328125, "rewards_train/margins": 4.1875, "rewards_train/rejected": -4.84375, "sft_loss": 0.609375, "step": 6159 }, { "dpo_loss": 0.09814453125, "epoch": 0.99, "final_loss": 0.09814453125, "grad_norm": 0.0, "learning_rate": 5.220221800161795e-10, "loss": 0.081, "projector_lr": 1.5660665400485386e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.46875, "rewards_train/rejected": -5.78125, "sft_loss": 0.56640625, "step": 6160 }, { "dpo_loss": 0.0390625, "epoch": 0.99, "final_loss": 0.0390625, "grad_norm": 0.0, "learning_rate": 5.104880977198079e-10, "loss": 0.2811, "projector_lr": 1.5314642931594237e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.78125, "rewards_train/margins": 4.28125, "rewards_train/rejected": -5.0625, "sft_loss": 1.0078125, "step": 6161 }, { "dpo_loss": 0.039794921875, "epoch": 0.99, "final_loss": 0.039794921875, "grad_norm": 0.0, "learning_rate": 4.990828005999592e-10, "loss": 0.0945, "projector_lr": 1.4972484017998777e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.32421875, "rewards_train/margins": 4.75, "rewards_train/rejected": -5.0625, "sft_loss": 0.59765625, "step": 6162 }, { "dpo_loss": 0.0162353515625, "epoch": 0.99, "final_loss": 0.0162353515625, "grad_norm": 0.0, "learning_rate": 4.878062915972259e-10, "loss": 0.1687, "projector_lr": 1.4634188747916776e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.515625, "rewards_train/margins": 6.375, "rewards_train/rejected": -6.90625, "sft_loss": 0.6171875, "step": 6163 }, { "dpo_loss": 0.115234375, "epoch": 0.99, "final_loss": 0.115234375, "grad_norm": 0.0, "learning_rate": 4.766585736191708e-10, "loss": 0.124, "projector_lr": 1.4299757208575125e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.98828125, "rewards_train/margins": 5.75, "rewards_train/rejected": -6.71875, "sft_loss": 0.5625, "step": 6164 }, { "dpo_loss": 0.6171875, "epoch": 0.99, "final_loss": 0.6171875, "grad_norm": 0.0, "learning_rate": 4.6563964953999504e-10, "loss": 0.4298, "projector_lr": 1.396918948619985e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.78125, "rewards_train/margins": 3.71875, "rewards_train/rejected": -5.5, "sft_loss": 0.87890625, "step": 6165 }, { "dpo_loss": 0.255859375, "epoch": 0.99, "final_loss": 0.255859375, "grad_norm": 0.0, "learning_rate": 4.547495222008146e-10, "loss": 0.3074, "projector_lr": 1.364248566602444e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.859375, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.3125, "sft_loss": 0.8515625, "step": 6166 }, { "dpo_loss": 0.166015625, "epoch": 0.99, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 4.439881944093282e-10, "loss": 0.1377, "projector_lr": 1.3319645832279848e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.15625, "rewards_train/margins": 5.0, "rewards_train/rejected": -6.15625, "sft_loss": 0.80859375, "step": 6167 }, { "dpo_loss": 0.2421875, "epoch": 0.99, "final_loss": 0.2421875, "grad_norm": 0.0, "learning_rate": 4.3335566894031617e-10, "loss": 0.1694, "projector_lr": 1.3000670068209486e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.15625, "sft_loss": 0.7734375, "step": 6168 }, { "dpo_loss": 0.03662109375, "epoch": 0.99, "final_loss": 0.03662109375, "grad_norm": 0.0, "learning_rate": 4.2285194853514114e-10, "loss": 0.0694, "projector_lr": 1.2685558456054236e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.10302734375, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.03125, "sft_loss": 0.48046875, "step": 6169 }, { "dpo_loss": 0.1767578125, "epoch": 0.99, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 4.1247703590197023e-10, "loss": 0.1833, "projector_lr": 1.2374311077059109e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.75390625, "rewards_train/margins": 4.40625, "rewards_train/rejected": -5.15625, "sft_loss": 0.796875, "step": 6170 }, { "dpo_loss": 0.341796875, "epoch": 0.99, "final_loss": 0.341796875, "grad_norm": 0.0, "learning_rate": 4.022309337158858e-10, "loss": 0.2998, "projector_lr": 1.2066928011476574e-09, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.3203125, "rewards_train/margins": 3.453125, "rewards_train/rejected": -4.75, "sft_loss": 1.359375, "step": 6171 }, { "dpo_loss": 0.0732421875, "epoch": 0.99, "final_loss": 0.0732421875, "grad_norm": 0.0, "learning_rate": 3.9211364461860796e-10, "loss": 0.2242, "projector_lr": 1.176340933855824e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.09375, "sft_loss": 0.60546875, "step": 6172 }, { "dpo_loss": 0.2060546875, "epoch": 0.99, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 3.821251712187723e-10, "loss": 0.2285, "projector_lr": 1.146375513656317e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.828125, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.65625, "sft_loss": 0.70703125, "step": 6173 }, { "dpo_loss": 0.0654296875, "epoch": 0.99, "final_loss": 0.0654296875, "grad_norm": 0.0, "learning_rate": 3.7226551609165215e-10, "loss": 0.1493, "projector_lr": 1.1167965482749564e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 6.21875, "rewards_train/rejected": -7.4375, "sft_loss": 0.84765625, "step": 6174 }, { "dpo_loss": 0.10986328125, "epoch": 0.99, "final_loss": 0.10986328125, "grad_norm": 0.0, "learning_rate": 3.6253468177954715e-10, "loss": 0.0586, "projector_lr": 1.0876040453386415e-09, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.4375, "sft_loss": 0.80859375, "step": 6175 }, { "dpo_loss": 0.0537109375, "epoch": 0.99, "final_loss": 0.0537109375, "grad_norm": 0.0, "learning_rate": 3.5293267079128386e-10, "loss": 0.0328, "projector_lr": 1.0587980123738517e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.88671875, "rewards_train/margins": 7.875, "rewards_train/rejected": -8.75, "sft_loss": 0.57421875, "step": 6176 }, { "dpo_loss": 0.13671875, "epoch": 0.99, "final_loss": 0.13671875, "grad_norm": 0.0, "learning_rate": 3.4345948560254855e-10, "loss": 0.0941, "projector_lr": 1.0303784568076458e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.453125, "rewards_train/margins": 5.25, "rewards_train/rejected": -6.6875, "sft_loss": 0.71875, "step": 6177 }, { "dpo_loss": 0.01239013671875, "epoch": 0.99, "final_loss": 0.01239013671875, "grad_norm": 0.0, "learning_rate": 3.341151286559429e-10, "loss": 0.1071, "projector_lr": 1.0023453859678288e-09, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.255859375, "rewards_train/margins": 6.875, "rewards_train/rejected": -7.125, "sft_loss": 0.68359375, "step": 6178 }, { "dpo_loss": 0.216796875, "epoch": 0.99, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 3.2489960236076195e-10, "loss": 0.2255, "projector_lr": 9.74698807082286e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.6796875, "rewards_train/margins": 4.40625, "rewards_train/rejected": -6.0625, "sft_loss": 1.0859375, "step": 6179 }, { "dpo_loss": 0.16015625, "epoch": 0.99, "final_loss": 0.16015625, "grad_norm": 0.0, "learning_rate": 3.15812909092994e-10, "loss": 0.203, "projector_lr": 9.47438727278982e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.203125, "rewards_train/margins": 3.4375, "rewards_train/rejected": -5.625, "sft_loss": 1.21875, "step": 6180 }, { "dpo_loss": 0.1904296875, "epoch": 0.99, "final_loss": 0.1904296875, "grad_norm": 0.0, "learning_rate": 3.068550511955426e-10, "loss": 0.1014, "projector_lr": 9.205651535866277e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 6.03125, "rewards_train/rejected": -7.59375, "sft_loss": 0.76171875, "step": 6181 }, { "dpo_loss": 0.023193359375, "epoch": 0.99, "final_loss": 0.023193359375, "grad_norm": 0.0, "learning_rate": 2.980260309780047e-10, "loss": 0.2016, "projector_lr": 8.940780929340142e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.89453125, "rewards_train/margins": 5.84375, "rewards_train/rejected": -6.71875, "sft_loss": 0.62890625, "step": 6182 }, { "dpo_loss": 0.216796875, "epoch": 0.99, "final_loss": 0.216796875, "grad_norm": 0.0, "learning_rate": 2.8932585071683725e-10, "loss": 0.1923, "projector_lr": 8.679775521505118e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 3.84375, "rewards_train/rejected": -4.59375, "sft_loss": 0.875, "step": 6183 }, { "dpo_loss": 0.51953125, "epoch": 0.99, "final_loss": 0.51953125, "grad_norm": 0.0, "learning_rate": 2.8075451265524575e-10, "loss": 0.3187, "projector_lr": 8.422635379657373e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3828125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.78125, "sft_loss": 0.76953125, "step": 6184 }, { "dpo_loss": 0.25, "epoch": 0.99, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 2.7231201900318467e-10, "loss": 0.1272, "projector_lr": 8.169360570095541e-10, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.03125, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.5, "sft_loss": 0.7265625, "step": 6185 }, { "dpo_loss": 0.09521484375, "epoch": 0.99, "final_loss": 0.09521484375, "grad_norm": 0.0, "learning_rate": 2.6399837193746833e-10, "loss": 0.0679, "projector_lr": 7.91995115812405e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.609375, "rewards_train/margins": 5.125, "rewards_train/rejected": -5.75, "sft_loss": 0.90234375, "step": 6186 }, { "dpo_loss": 0.0771484375, "epoch": 0.99, "final_loss": 0.0771484375, "grad_norm": 0.0, "learning_rate": 2.558135736015488e-10, "loss": 0.2068, "projector_lr": 7.674407208046463e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.443359375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.125, "sft_loss": 0.7265625, "step": 6187 }, { "dpo_loss": 0.0693359375, "epoch": 0.99, "final_loss": 0.0693359375, "grad_norm": 0.0, "learning_rate": 2.4775762610579345e-10, "loss": 0.1321, "projector_lr": 7.432728783173803e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.484375, "rewards_train/margins": 6.375, "rewards_train/rejected": -7.875, "sft_loss": 0.9140625, "step": 6188 }, { "dpo_loss": 0.228515625, "epoch": 0.99, "final_loss": 0.228515625, "grad_norm": 0.0, "learning_rate": 2.398305315272631e-10, "loss": 0.1743, "projector_lr": 7.194915945817893e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.125, "rewards_train/margins": 4.0625, "rewards_train/rejected": -5.1875, "sft_loss": 0.79296875, "step": 6189 }, { "dpo_loss": 0.1025390625, "epoch": 0.99, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 2.3203229190982276e-10, "loss": 0.0777, "projector_lr": 6.960968757294683e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.275390625, "rewards_train/margins": 6.75, "rewards_train/rejected": -7.0, "sft_loss": 0.62109375, "step": 6190 }, { "dpo_loss": 0.0673828125, "epoch": 0.99, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 2.2436290926419744e-10, "loss": 0.0934, "projector_lr": 6.730887277925924e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.046875, "rewards_train/margins": 5.46875, "rewards_train/rejected": -6.53125, "sft_loss": 0.94921875, "step": 6191 }, { "dpo_loss": 0.02978515625, "epoch": 0.99, "final_loss": 0.02978515625, "grad_norm": 0.0, "learning_rate": 2.168223855677498e-10, "loss": 0.0384, "projector_lr": 6.504671567032494e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": 0.00689697265625, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.1875, "sft_loss": 0.69140625, "step": 6192 }, { "dpo_loss": 0.150390625, "epoch": 0.99, "final_loss": 0.150390625, "grad_norm": 0.0, "learning_rate": 2.094107227647024e-10, "loss": 0.0819, "projector_lr": 6.282321682941072e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.4375, "rewards_train/margins": 5.90625, "rewards_train/rejected": -7.34375, "sft_loss": 1.234375, "step": 6193 }, { "dpo_loss": 0.283203125, "epoch": 0.99, "final_loss": 0.283203125, "grad_norm": 0.0, "learning_rate": 2.0212792276602664e-10, "loss": 0.2461, "projector_lr": 6.0638376829808e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.1484375, "rewards_train/margins": 4.1875, "rewards_train/rejected": -5.34375, "sft_loss": 0.71484375, "step": 6194 }, { "dpo_loss": 0.07080078125, "epoch": 0.99, "final_loss": 0.07080078125, "grad_norm": 0.0, "learning_rate": 1.949739874494427e-10, "loss": 0.0847, "projector_lr": 5.849219623483281e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.439453125, "rewards_train/margins": 5.625, "rewards_train/rejected": -6.0625, "sft_loss": 0.640625, "step": 6195 }, { "dpo_loss": 0.4375, "epoch": 0.99, "final_loss": 0.4375, "grad_norm": 0.0, "learning_rate": 1.8794891865947516e-10, "loss": 0.3088, "projector_lr": 5.638467559784255e-10, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.8359375, "rewards_train/margins": 4.53125, "rewards_train/rejected": -6.375, "sft_loss": 0.90234375, "step": 6196 }, { "dpo_loss": 0.0157470703125, "epoch": 0.99, "final_loss": 0.0157470703125, "grad_norm": 0.0, "learning_rate": 1.8105271820750833e-10, "loss": 0.1283, "projector_lr": 5.431581546225251e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.3125, "rewards_train/margins": 5.90625, "rewards_train/rejected": -6.21875, "sft_loss": 0.8125, "step": 6197 }, { "dpo_loss": 0.15234375, "epoch": 0.99, "final_loss": 0.15234375, "grad_norm": 0.0, "learning_rate": 1.7428538787150892e-10, "loss": 0.2469, "projector_lr": 5.228561636145268e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.3046875, "rewards_train/margins": 4.25, "rewards_train/rejected": -5.5625, "sft_loss": 0.7734375, "step": 6198 }, { "dpo_loss": 0.474609375, "epoch": 0.99, "final_loss": 0.474609375, "grad_norm": 0.0, "learning_rate": 1.6764692939641446e-10, "loss": 0.4609, "projector_lr": 5.029407881892433e-10, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.94140625, "rewards_train/margins": 3.859375, "rewards_train/rejected": -4.8125, "sft_loss": 0.7734375, "step": 6199 }, { "dpo_loss": 0.138671875, "epoch": 0.99, "final_loss": 0.138671875, "grad_norm": 0.0, "learning_rate": 1.6113734449374472e-10, "loss": 0.2293, "projector_lr": 4.834120334812342e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.5703125, "rewards_train/margins": 5.15625, "rewards_train/rejected": -6.71875, "sft_loss": 0.9765625, "step": 6200 }, { "dpo_loss": 0.11865234375, "epoch": 0.99, "final_loss": 0.11865234375, "grad_norm": 0.0, "learning_rate": 1.547566348419349e-10, "loss": 0.1108, "projector_lr": 4.6426990452580474e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.6875, "sft_loss": 0.765625, "step": 6201 }, { "dpo_loss": 0.0830078125, "epoch": 0.99, "final_loss": 0.0830078125, "grad_norm": 0.0, "learning_rate": 1.48504802086169e-10, "loss": 0.0505, "projector_lr": 4.4551440625850704e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0625, "rewards_train/margins": 4.34375, "rewards_train/rejected": -5.40625, "sft_loss": 0.8203125, "step": 6202 }, { "dpo_loss": 0.34375, "epoch": 0.99, "final_loss": 0.34375, "grad_norm": 0.0, "learning_rate": 1.423818478383798e-10, "loss": 0.2972, "projector_lr": 4.271455435151395e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8828125, "rewards_train/margins": 4.5625, "rewards_train/rejected": -5.46875, "sft_loss": 0.6328125, "step": 6203 }, { "dpo_loss": 0.1708984375, "epoch": 0.99, "final_loss": 0.1708984375, "grad_norm": 0.0, "learning_rate": 1.3638777367724896e-10, "loss": 0.1314, "projector_lr": 4.0916332103174694e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.67578125, "rewards_train/margins": 3.203125, "rewards_train/rejected": -3.875, "sft_loss": 0.95703125, "step": 6204 }, { "dpo_loss": 0.2060546875, "epoch": 0.99, "final_loss": 0.2060546875, "grad_norm": 0.0, "learning_rate": 1.3052258114820692e-10, "loss": 0.1233, "projector_lr": 3.9156774344462077e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.890625, "rewards_train/margins": 3.375, "rewards_train/rejected": -4.28125, "sft_loss": 0.82421875, "step": 6205 }, { "dpo_loss": 0.1767578125, "epoch": 0.99, "final_loss": 0.1767578125, "grad_norm": 0.0, "learning_rate": 1.2478627176359947e-10, "loss": 0.1425, "projector_lr": 3.7435881529079843e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.015625, "rewards_train/margins": 4.4375, "rewards_train/rejected": -5.46875, "sft_loss": 0.83203125, "step": 6206 }, { "dpo_loss": 0.3125, "epoch": 0.99, "final_loss": 0.3125, "grad_norm": 0.0, "learning_rate": 1.191788470023547e-10, "loss": 0.2093, "projector_lr": 3.575365410070641e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.78515625, "rewards_train/margins": 6.8125, "rewards_train/rejected": -7.59375, "sft_loss": 0.7109375, "step": 6207 }, { "dpo_loss": 0.1015625, "epoch": 0.99, "final_loss": 0.1015625, "grad_norm": 0.0, "learning_rate": 1.1370030831031607e-10, "loss": 0.0929, "projector_lr": 3.4110092493094823e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.7734375, "rewards_train/margins": 5.1875, "rewards_train/rejected": -5.96875, "sft_loss": 0.796875, "step": 6208 }, { "dpo_loss": 0.03857421875, "epoch": 0.99, "final_loss": 0.03857421875, "grad_norm": 0.0, "learning_rate": 1.0835065709996483e-10, "loss": 0.058, "projector_lr": 3.250519712998945e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.1953125, "rewards_train/margins": 4.90625, "rewards_train/rejected": -6.09375, "sft_loss": 0.703125, "step": 6209 }, { "dpo_loss": 0.05419921875, "epoch": 0.99, "final_loss": 0.05419921875, "grad_norm": 0.0, "learning_rate": 1.0312989475069755e-10, "loss": 0.0919, "projector_lr": 3.093896842520927e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.96875, "rewards_train/margins": 6.125, "rewards_train/rejected": -7.09375, "sft_loss": 0.91796875, "step": 6210 }, { "dpo_loss": 0.208984375, "epoch": 0.99, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 9.803802260854866e-11, "loss": 0.4724, "projector_lr": 2.9411406782564597e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.234375, "rewards_train/margins": 4.0, "rewards_train/rejected": -5.25, "sft_loss": 0.93359375, "step": 6211 }, { "dpo_loss": 0.69140625, "epoch": 0.99, "final_loss": 0.69140625, "grad_norm": 0.0, "learning_rate": 9.307504198635686e-11, "loss": 0.3861, "projector_lr": 2.7922512595907055e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.2578125, "rewards_train/margins": 3.390625, "rewards_train/rejected": -4.65625, "sft_loss": 0.65234375, "step": 6212 }, { "dpo_loss": 0.0927734375, "epoch": 0.99, "final_loss": 0.0927734375, "grad_norm": 0.0, "learning_rate": 8.824095416382071e-11, "loss": 0.1127, "projector_lr": 2.647228624914622e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.65625, "rewards_train/margins": 4.78125, "rewards_train/rejected": -5.4375, "sft_loss": 0.80859375, "step": 6213 }, { "dpo_loss": 0.1513671875, "epoch": 0.99, "final_loss": 0.1513671875, "grad_norm": 0.0, "learning_rate": 8.353576038727662e-11, "loss": 0.1298, "projector_lr": 2.506072811618298e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.7578125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -6.0625, "sft_loss": 0.69921875, "step": 6214 }, { "dpo_loss": 0.388671875, "epoch": 0.99, "final_loss": 0.388671875, "grad_norm": 0.0, "learning_rate": 7.895946186986524e-11, "loss": 0.2917, "projector_lr": 2.368783856095957e-10, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.328125, "rewards_train/margins": 4.53125, "rewards_train/rejected": -5.875, "sft_loss": 0.8046875, "step": 6215 }, { "dpo_loss": 0.0703125, "epoch": 0.99, "final_loss": 0.0703125, "grad_norm": 0.0, "learning_rate": 7.451205979153164e-11, "loss": 0.0644, "projector_lr": 2.2353617937459493e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 4.65625, "rewards_train/rejected": -5.75, "sft_loss": 1.015625, "step": 6216 }, { "dpo_loss": 0.1005859375, "epoch": 0.99, "final_loss": 0.1005859375, "grad_norm": 0.0, "learning_rate": 7.019355529902515e-11, "loss": 0.2492, "projector_lr": 2.1058066589707546e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -2.390625, "rewards_train/margins": 5.125, "rewards_train/rejected": -7.5, "sft_loss": 1.15625, "step": 6217 }, { "dpo_loss": 0.6015625, "epoch": 0.99, "final_loss": 0.6015625, "grad_norm": 0.0, "learning_rate": 6.600394950573296e-11, "loss": 0.4229, "projector_lr": 1.980118485171989e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.87109375, "rewards_train/margins": 6.1875, "rewards_train/rejected": -7.0625, "sft_loss": 0.7265625, "step": 6218 }, { "dpo_loss": 0.52734375, "epoch": 1.0, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 6.194324349184654e-11, "loss": 0.2857, "projector_lr": 1.8582973047553965e-10, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.62890625, "rewards_train/margins": 3.984375, "rewards_train/rejected": -4.625, "sft_loss": 0.67578125, "step": 6219 }, { "dpo_loss": 0.2177734375, "epoch": 1.0, "final_loss": 0.2177734375, "grad_norm": 0.0, "learning_rate": 5.8011438304417237e-11, "loss": 0.144, "projector_lr": 1.7403431491325172e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.8515625, "rewards_train/margins": 4.96875, "rewards_train/rejected": -5.78125, "sft_loss": 0.70703125, "step": 6220 }, { "dpo_loss": 0.11083984375, "epoch": 1.0, "final_loss": 0.11083984375, "grad_norm": 0.0, "learning_rate": 5.420853495718969e-11, "loss": 0.0647, "projector_lr": 1.6262560487156907e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.359375, "rewards_train/margins": 3.921875, "rewards_train/rejected": -5.28125, "sft_loss": 0.82421875, "step": 6221 }, { "dpo_loss": 0.111328125, "epoch": 1.0, "final_loss": 0.111328125, "grad_norm": 0.0, "learning_rate": 5.053453443065736e-11, "loss": 0.0631, "projector_lr": 1.516036032919721e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0703125, "rewards_train/margins": 4.09375, "rewards_train/rejected": -5.15625, "sft_loss": 0.8671875, "step": 6222 }, { "dpo_loss": 0.271484375, "epoch": 1.0, "final_loss": 0.271484375, "grad_norm": 0.0, "learning_rate": 4.698943767211805e-11, "loss": 0.1945, "projector_lr": 1.4096831301635414e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.92578125, "rewards_train/margins": 2.171875, "rewards_train/rejected": -3.09375, "sft_loss": 0.84765625, "step": 6223 }, { "dpo_loss": 0.25, "epoch": 1.0, "final_loss": 0.25, "grad_norm": 0.0, "learning_rate": 4.357324559561837e-11, "loss": 0.1899, "projector_lr": 1.307197367868551e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.21875, "rewards_train/margins": 4.46875, "rewards_train/rejected": -4.6875, "sft_loss": 0.671875, "step": 6224 }, { "dpo_loss": 0.251953125, "epoch": 1.0, "final_loss": 0.251953125, "grad_norm": 0.0, "learning_rate": 4.028595908195376e-11, "loss": 0.1903, "projector_lr": 1.2085787724586128e-10, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.765625, "rewards_train/margins": 1.5625, "rewards_train/rejected": -3.328125, "sft_loss": 0.80078125, "step": 6225 }, { "dpo_loss": 0.1806640625, "epoch": 1.0, "final_loss": 0.1806640625, "grad_norm": 0.0, "learning_rate": 3.712757897866847e-11, "loss": 0.1419, "projector_lr": 1.1138273693600542e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.9921875, "rewards_train/margins": 4.125, "rewards_train/rejected": -5.125, "sft_loss": 0.64453125, "step": 6226 }, { "dpo_loss": 0.166015625, "epoch": 1.0, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 3.409810610016661e-11, "loss": 0.2682, "projector_lr": 1.0229431830049984e-10, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.875, "rewards_train/margins": 6.34375, "rewards_train/rejected": -7.21875, "sft_loss": 0.76171875, "step": 6227 }, { "dpo_loss": 0.18359375, "epoch": 1.0, "final_loss": 0.18359375, "grad_norm": 0.0, "learning_rate": 3.1197541227490076e-11, "loss": 0.1149, "projector_lr": 9.359262368247023e-11, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.6875, "rewards_train/margins": 2.875, "rewards_train/rejected": -3.5625, "sft_loss": 0.75, "step": 6228 }, { "dpo_loss": 0.08154296875, "epoch": 1.0, "final_loss": 0.08154296875, "grad_norm": 0.0, "learning_rate": 2.8425885108540603e-11, "loss": 0.0648, "projector_lr": 8.527765532562182e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.59765625, "rewards_train/margins": 4.84375, "rewards_train/rejected": -5.4375, "sft_loss": 0.65234375, "step": 6229 }, { "dpo_loss": 0.39453125, "epoch": 1.0, "final_loss": 0.39453125, "grad_norm": 0.0, "learning_rate": 2.5783138457913244e-11, "loss": 0.2767, "projector_lr": 7.734941537373974e-11, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.546875, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.8125, "sft_loss": 0.8515625, "step": 6230 }, { "dpo_loss": 0.166015625, "epoch": 1.0, "final_loss": 0.166015625, "grad_norm": 0.0, "learning_rate": 2.326930195700738e-11, "loss": 0.1356, "projector_lr": 6.980790587102214e-11, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0625, "rewards_train/margins": 5.65625, "rewards_train/rejected": -6.71875, "sft_loss": 0.515625, "step": 6231 }, { "dpo_loss": 0.318359375, "epoch": 1.0, "final_loss": 0.318359375, "grad_norm": 0.0, "learning_rate": 2.0884376253971214e-11, "loss": 0.2181, "projector_lr": 6.265312876191365e-11, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -1.4296875, "rewards_train/margins": 3.765625, "rewards_train/rejected": -5.1875, "sft_loss": 0.77734375, "step": 6232 }, { "dpo_loss": 0.0673828125, "epoch": 1.0, "final_loss": 0.0673828125, "grad_norm": 0.0, "learning_rate": 1.8628361963757278e-11, "loss": 0.2382, "projector_lr": 5.588508589127184e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 4.25, "rewards_train/rejected": -4.75, "sft_loss": 0.73828125, "step": 6233 }, { "dpo_loss": 0.019775390625, "epoch": 1.0, "final_loss": 0.019775390625, "grad_norm": 0.0, "learning_rate": 1.6501259668011415e-11, "loss": 0.0205, "projector_lr": 4.9503779004034245e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.0037841796875, "rewards_train/margins": 6.21875, "rewards_train/rejected": -6.21875, "sft_loss": 0.765625, "step": 6234 }, { "dpo_loss": 0.052490234375, "epoch": 1.0, "final_loss": 0.052490234375, "grad_norm": 0.0, "learning_rate": 1.4503069915128285e-11, "loss": 0.1108, "projector_lr": 4.350920974538486e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.82421875, "rewards_train/margins": 5.5625, "rewards_train/rejected": -6.375, "sft_loss": 0.83984375, "step": 6235 }, { "dpo_loss": 0.091796875, "epoch": 1.0, "final_loss": 0.091796875, "grad_norm": 0.0, "learning_rate": 1.263379322041791e-11, "loss": 0.0669, "projector_lr": 3.7901379661253734e-11, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.68359375, "rewards_train/margins": 5.5, "rewards_train/rejected": -6.1875, "sft_loss": 0.85546875, "step": 6236 }, { "dpo_loss": 0.014892578125, "epoch": 1.0, "final_loss": 0.014892578125, "grad_norm": 0.0, "learning_rate": 1.089343006571708e-11, "loss": 0.0868, "projector_lr": 3.268029019715124e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.19140625, "rewards_train/margins": 7.28125, "rewards_train/rejected": -7.46875, "sft_loss": 0.71484375, "step": 6237 }, { "dpo_loss": 0.19921875, "epoch": 1.0, "final_loss": 0.19921875, "grad_norm": 0.0, "learning_rate": 9.281980899833453e-12, "loss": 0.3316, "projector_lr": 2.784594269950036e-11, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.353515625, "rewards_train/margins": 5.4375, "rewards_train/rejected": -5.78125, "sft_loss": 0.73046875, "step": 6238 }, { "dpo_loss": 0.09130859375, "epoch": 1.0, "final_loss": 0.09130859375, "grad_norm": 0.0, "learning_rate": 7.799446138212484e-12, "loss": 0.0821, "projector_lr": 2.3398338414637454e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.671875, "rewards_train/margins": 4.59375, "rewards_train/rejected": -5.25, "sft_loss": 0.765625, "step": 6239 }, { "dpo_loss": 0.3203125, "epoch": 1.0, "final_loss": 0.3203125, "grad_norm": 0.0, "learning_rate": 6.445826163159474e-12, "loss": 0.1743, "projector_lr": 1.933747848947842e-11, "rewards_train/accuracies": 0.75, "rewards_train/chosen": -0.84375, "rewards_train/margins": 3.25, "rewards_train/rejected": -4.09375, "sft_loss": 0.48046875, "step": 6240 }, { "dpo_loss": 0.06640625, "epoch": 1.0, "final_loss": 0.06640625, "grad_norm": 0.0, "learning_rate": 5.2211213236175165e-12, "loss": 0.3435, "projector_lr": 1.566336397085255e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.17578125, "rewards_train/margins": 5.6875, "rewards_train/rejected": -5.84375, "sft_loss": 0.55078125, "step": 6241 }, { "dpo_loss": 0.1025390625, "epoch": 1.0, "final_loss": 0.1025390625, "grad_norm": 0.0, "learning_rate": 4.125331935389553e-12, "loss": 0.093, "projector_lr": 1.2375995806168661e-11, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 5.125, "rewards_train/rejected": -6.1875, "sft_loss": 1.1953125, "step": 6242 }, { "dpo_loss": 0.1923828125, "epoch": 1.0, "final_loss": 0.1923828125, "grad_norm": 0.0, "learning_rate": 3.1584582809718317e-12, "loss": 0.1628, "projector_lr": 9.475374842915497e-12, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -2.015625, "rewards_train/margins": 3.953125, "rewards_train/rejected": -5.96875, "sft_loss": 1.15625, "step": 6243 }, { "dpo_loss": 0.52734375, "epoch": 1.0, "final_loss": 0.52734375, "grad_norm": 0.0, "learning_rate": 2.3205006096649326e-12, "loss": 0.3078, "projector_lr": 6.961501828994798e-12, "rewards_train/accuracies": 0.625, "rewards_train/chosen": -1.3359375, "rewards_train/margins": 1.453125, "rewards_train/rejected": -2.796875, "sft_loss": 0.9375, "step": 6244 }, { "dpo_loss": 0.00823974609375, "epoch": 1.0, "final_loss": 0.00823974609375, "grad_norm": 0.0, "learning_rate": 1.6114591375737673e-12, "loss": 0.072, "projector_lr": 4.834377412721302e-12, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.76953125, "rewards_train/margins": 7.25, "rewards_train/rejected": -8.0625, "sft_loss": 0.703125, "step": 6245 }, { "dpo_loss": 0.0458984375, "epoch": 1.0, "final_loss": 0.0458984375, "grad_norm": 0.0, "learning_rate": 1.031334047496557e-12, "loss": 0.0636, "projector_lr": 3.0940021424896714e-12, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -1.25, "rewards_train/margins": 6.25, "rewards_train/rejected": -7.5, "sft_loss": 0.60546875, "step": 6246 }, { "dpo_loss": 0.07666015625, "epoch": 1.0, "final_loss": 0.07666015625, "grad_norm": 0.0, "learning_rate": 5.801254889803431e-13, "loss": 0.0413, "projector_lr": 1.7403764669410294e-12, "rewards_train/accuracies": 1.0, "rewards_train/chosen": -0.80078125, "rewards_train/margins": 5.71875, "rewards_train/rejected": -6.53125, "sft_loss": 0.859375, "step": 6247 }, { "dpo_loss": 0.287109375, "epoch": 1.0, "final_loss": 0.287109375, "grad_norm": 0.0, "learning_rate": 2.578335783764984e-13, "loss": 0.184, "projector_lr": 7.735007351294954e-13, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -1.0859375, "rewards_train/margins": 3.515625, "rewards_train/rejected": -4.59375, "sft_loss": 0.92578125, "step": 6248 }, { "dpo_loss": 0.1201171875, "epoch": 1.0, "final_loss": 0.1201171875, "grad_norm": 0.0, "learning_rate": 6.445839872970538e-14, "loss": 0.216, "projector_lr": 1.9337519618911615e-13, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.5078125, "rewards_train/margins": 5.3125, "rewards_train/rejected": -5.8125, "sft_loss": 0.71875, "step": 6249 }, { "dpo_loss": 0.208984375, "epoch": 1.0, "final_loss": 0.208984375, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.2415, "projector_lr": 0.0, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.90625, "sft_loss": 0.578125, "step": 6250 }, { "dpo_loss": 0.208984375, "epoch": 1.0, "final_loss": 0.208984375, "projector_lr": 0.0, "rewards_train/accuracies": 0.875, "rewards_train/chosen": -0.97265625, "rewards_train/margins": 4.9375, "rewards_train/rejected": -5.90625, "sft_loss": 0.578125, "step": 6250, "total_flos": 4.4275017142448947e+18, "train_loss": 0.19849793981045485, "train_runtime": 19567.5658, "train_samples_per_second": 5.11, "train_steps_per_second": 0.319 } ], "logging_steps": 1, "max_steps": 6250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.4275017142448947e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }