m-ric
/

OpenR1-SmolLM2-1.7B-Instruct-Agentic

@@ -1,11 +1,9 @@
 ---
 base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
-datasets: smolagents/training-traces
 library_name: transformers
 model_name: OpenR1-SmolLM2-1.7B-Instruct-Agentic
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - sft
 licence: license
@@ -13,7 +11,7 @@ licence: license
 # Model Card for OpenR1-SmolLM2-1.7B-Instruct-Agentic
-This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-1.7B-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct) on the [smolagents/training-traces](https://huggingface.co/datasets/smolagents/training-traces) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/obusc7q3)
 This model was trained with SFT.

 ---
 base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
 library_name: transformers
 model_name: OpenR1-SmolLM2-1.7B-Instruct-Agentic
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 # Model Card for OpenR1-SmolLM2-1.7B-Instruct-Agentic
+This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-1.7B-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/d29a2xdi)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 6155761876992.0,
-    "train_loss": 1.0376410086949666,
-    "train_runtime": 169.1619,
-    "train_samples": 1204,
-    "train_samples_per_second": 8.317,
-    "train_steps_per_second": 0.124
 }

 {
+    "total_flos": 18782965727232.0,
+    "train_loss": 0.8722121318181356,
+    "train_runtime": 466.0776,
+    "train_samples": 1928,
+    "train_samples_per_second": 7.853,
+    "train_steps_per_second": 0.129
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 6155761876992.0,
-    "train_loss": 1.0376410086949666,
-    "train_runtime": 169.1619,
-    "train_samples": 1204,
-    "train_samples_per_second": 8.317,
-    "train_steps_per_second": 0.124
 }

 {
+    "total_flos": 18782965727232.0,
+    "train_loss": 0.8722121318181356,
+    "train_runtime": 466.0776,
+    "train_samples": 1928,
+    "train_samples_per_second": 7.853,
+    "train_steps_per_second": 0.129
 }

trainer_state.json CHANGED Viewed

@@ -2,65 +2,135 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.6666666666666665,
   "eval_steps": 500,
-  "global_step": 21,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.6666666666666666,
-      "grad_norm": 0.3659521589282829,
-      "learning_rate": 4.4444444444444447e-05,
-      "loss": 1.1898,
-      "mean_token_accuracy": 0.749215167760849,
-      "num_tokens": 2476935.0,
       "step": 5
     },
     {
-      "epoch": 1.2666666666666666,
-      "grad_norm": 0.31543356085932234,
-      "learning_rate": 3.055555555555556e-05,
-      "loss": 1.0408,
-      "mean_token_accuracy": 0.773130026128557,
-      "num_tokens": 4779069.0,
       "step": 10
     },
     {
-      "epoch": 1.9333333333333333,
-      "grad_norm": 0.23778923448881445,
-      "learning_rate": 1.6666666666666667e-05,
-      "loss": 0.9778,
-      "mean_token_accuracy": 0.7825889229774475,
-      "num_tokens": 7274168.0,
       "step": 15
     },
     {
-      "epoch": 2.533333333333333,
-      "grad_norm": 0.19692051437026512,
-      "learning_rate": 2.777777777777778e-06,
-      "loss": 0.9744,
-      "mean_token_accuracy": 0.7865516278478835,
-      "num_tokens": 9521856.0,
       "step": 20
     },
     {
-      "epoch": 2.6666666666666665,
-      "mean_token_accuracy": 0.7911556959152222,
-      "num_tokens": 10020101.0,
-      "step": 21,
-      "total_flos": 6155761876992.0,
-      "train_loss": 1.0376410086949666,
-      "train_runtime": 169.1619,
-      "train_samples_per_second": 8.317,
-      "train_steps_per_second": 0.124
     }
   ],
   "logging_steps": 5,
-  "max_steps": 21,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -74,7 +144,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6155761876992.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.0,
   "eval_steps": 500,
+  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.5,
+      "grad_norm": 1.1611291291820123,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 1.175,
+      "mean_token_accuracy": 0.7521877646446228,
+      "num_tokens": 2465164.0,
       "step": 5
     },
     {
+      "epoch": 1.0,
+      "grad_norm": 0.6622829891502945,
+      "learning_rate": 4.62962962962963e-05,
+      "loss": 1.0551,
+      "mean_token_accuracy": 0.7775114715099335,
+      "num_tokens": 4604565.0,
       "step": 10
     },
     {
+      "epoch": 1.5,
+      "grad_norm": 0.3321063533662744,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.974,
+      "mean_token_accuracy": 0.7821209251880645,
+      "num_tokens": 7112937.0,
       "step": 15
     },
     {
+      "epoch": 2.0,
+      "grad_norm": 0.17372883404582676,
+      "learning_rate": 3.7037037037037037e-05,
+      "loss": 0.9079,
+      "mean_token_accuracy": 0.797761058807373,
+      "num_tokens": 9211540.0,
       "step": 20
     },
     {
+      "epoch": 2.5,
+      "grad_norm": 0.3250721867237713,
+      "learning_rate": 3.240740740740741e-05,
+      "loss": 0.8587,
+      "mean_token_accuracy": 0.80385662317276,
+      "num_tokens": 11676501.0,
+      "step": 25
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.12878896880550417,
+      "learning_rate": 2.777777777777778e-05,
+      "loss": 0.8491,
+      "mean_token_accuracy": 0.8127783119678498,
+      "num_tokens": 13823724.0,
+      "step": 30
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 0.12156986655948128,
+      "learning_rate": 2.314814814814815e-05,
+      "loss": 0.8216,
+      "mean_token_accuracy": 0.8109423339366912,
+      "num_tokens": 16296257.0,
+      "step": 35
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.11634538375626934,
+      "learning_rate": 1.8518518518518518e-05,
+      "loss": 0.7842,
+      "mean_token_accuracy": 0.8188613414764404,
+      "num_tokens": 18424879.0,
+      "step": 40
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 0.10458399531482464,
+      "learning_rate": 1.388888888888889e-05,
+      "loss": 0.7492,
+      "mean_token_accuracy": 0.8257159292697906,
+      "num_tokens": 20909688.0,
+      "step": 45
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.10973027739730677,
+      "learning_rate": 9.259259259259259e-06,
+      "loss": 0.7812,
+      "mean_token_accuracy": 0.8185243546962738,
+      "num_tokens": 23043160.0,
+      "step": 50
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 0.11172428707492313,
+      "learning_rate": 4.6296296296296296e-06,
+      "loss": 0.7663,
+      "mean_token_accuracy": 0.8235744714736939,
+      "num_tokens": 25563345.0,
+      "step": 55
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.1207429261911167,
+      "learning_rate": 0.0,
+      "loss": 0.7442,
+      "mean_token_accuracy": 0.8240064084529877,
+      "num_tokens": 27655450.0,
+      "step": 60
+    },
+    {
+      "epoch": 6.0,
+      "step": 60,
+      "total_flos": 18782965727232.0,
+      "train_loss": 0.8722121318181356,
+      "train_runtime": 466.0776,
+      "train_samples_per_second": 7.853,
+      "train_steps_per_second": 0.129
     }
   ],
   "logging_steps": 5,
+  "max_steps": 60,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 18782965727232.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null