m-ric HF Staff commited on
Commit
35e8c4a
·
verified ·
1 Parent(s): 8b1bfe9

Model save

Browse files
Files changed (4) hide show
  1. README.md +2 -4
  2. all_results.json +6 -6
  3. train_results.json +6 -6
  4. trainer_state.json +108 -38
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
3
- datasets: smolagents/training-traces
4
  library_name: transformers
5
  model_name: OpenR1-SmolLM2-1.7B-Instruct-Agentic
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - sft
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for OpenR1-SmolLM2-1.7B-Instruct-Agentic
15
 
16
- This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-1.7B-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct) on the [smolagents/training-traces](https://huggingface.co/datasets/smolagents/training-traces) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/obusc7q3)
33
 
34
 
35
  This model was trained with SFT.
 
1
  ---
2
  base_model: HuggingFaceTB/SmolLM2-1.7B-Instruct
 
3
  library_name: transformers
4
  model_name: OpenR1-SmolLM2-1.7B-Instruct-Agentic
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - sft
9
  licence: license
 
11
 
12
  # Model Card for OpenR1-SmolLM2-1.7B-Instruct-Agentic
13
 
14
+ This model is a fine-tuned version of [HuggingFaceTB/SmolLM2-1.7B-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/d29a2xdi)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 6155761876992.0,
3
- "train_loss": 1.0376410086949666,
4
- "train_runtime": 169.1619,
5
- "train_samples": 1204,
6
- "train_samples_per_second": 8.317,
7
- "train_steps_per_second": 0.124
8
  }
 
1
  {
2
+ "total_flos": 18782965727232.0,
3
+ "train_loss": 0.8722121318181356,
4
+ "train_runtime": 466.0776,
5
+ "train_samples": 1928,
6
+ "train_samples_per_second": 7.853,
7
+ "train_steps_per_second": 0.129
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 6155761876992.0,
3
- "train_loss": 1.0376410086949666,
4
- "train_runtime": 169.1619,
5
- "train_samples": 1204,
6
- "train_samples_per_second": 8.317,
7
- "train_steps_per_second": 0.124
8
  }
 
1
  {
2
+ "total_flos": 18782965727232.0,
3
+ "train_loss": 0.8722121318181356,
4
+ "train_runtime": 466.0776,
5
+ "train_samples": 1928,
6
+ "train_samples_per_second": 7.853,
7
+ "train_steps_per_second": 0.129
8
  }
trainer_state.json CHANGED
@@ -2,65 +2,135 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.6666666666666665,
6
  "eval_steps": 500,
7
- "global_step": 21,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "epoch": 0.6666666666666666,
14
- "grad_norm": 0.3659521589282829,
15
- "learning_rate": 4.4444444444444447e-05,
16
- "loss": 1.1898,
17
- "mean_token_accuracy": 0.749215167760849,
18
- "num_tokens": 2476935.0,
19
  "step": 5
20
  },
21
  {
22
- "epoch": 1.2666666666666666,
23
- "grad_norm": 0.31543356085932234,
24
- "learning_rate": 3.055555555555556e-05,
25
- "loss": 1.0408,
26
- "mean_token_accuracy": 0.773130026128557,
27
- "num_tokens": 4779069.0,
28
  "step": 10
29
  },
30
  {
31
- "epoch": 1.9333333333333333,
32
- "grad_norm": 0.23778923448881445,
33
- "learning_rate": 1.6666666666666667e-05,
34
- "loss": 0.9778,
35
- "mean_token_accuracy": 0.7825889229774475,
36
- "num_tokens": 7274168.0,
37
  "step": 15
38
  },
39
  {
40
- "epoch": 2.533333333333333,
41
- "grad_norm": 0.19692051437026512,
42
- "learning_rate": 2.777777777777778e-06,
43
- "loss": 0.9744,
44
- "mean_token_accuracy": 0.7865516278478835,
45
- "num_tokens": 9521856.0,
46
  "step": 20
47
  },
48
  {
49
- "epoch": 2.6666666666666665,
50
- "mean_token_accuracy": 0.7911556959152222,
51
- "num_tokens": 10020101.0,
52
- "step": 21,
53
- "total_flos": 6155761876992.0,
54
- "train_loss": 1.0376410086949666,
55
- "train_runtime": 169.1619,
56
- "train_samples_per_second": 8.317,
57
- "train_steps_per_second": 0.124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
  ],
60
  "logging_steps": 5,
61
- "max_steps": 21,
62
  "num_input_tokens_seen": 0,
63
- "num_train_epochs": 3,
64
  "save_steps": 500,
65
  "stateful_callbacks": {
66
  "TrainerControl": {
@@ -74,7 +144,7 @@
74
  "attributes": {}
75
  }
76
  },
77
- "total_flos": 6155761876992.0,
78
  "train_batch_size": 4,
79
  "trial_name": null,
80
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 6.0,
6
  "eval_steps": 500,
7
+ "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "epoch": 0.5,
14
+ "grad_norm": 1.1611291291820123,
15
+ "learning_rate": 4.166666666666667e-05,
16
+ "loss": 1.175,
17
+ "mean_token_accuracy": 0.7521877646446228,
18
+ "num_tokens": 2465164.0,
19
  "step": 5
20
  },
21
  {
22
+ "epoch": 1.0,
23
+ "grad_norm": 0.6622829891502945,
24
+ "learning_rate": 4.62962962962963e-05,
25
+ "loss": 1.0551,
26
+ "mean_token_accuracy": 0.7775114715099335,
27
+ "num_tokens": 4604565.0,
28
  "step": 10
29
  },
30
  {
31
+ "epoch": 1.5,
32
+ "grad_norm": 0.3321063533662744,
33
+ "learning_rate": 4.166666666666667e-05,
34
+ "loss": 0.974,
35
+ "mean_token_accuracy": 0.7821209251880645,
36
+ "num_tokens": 7112937.0,
37
  "step": 15
38
  },
39
  {
40
+ "epoch": 2.0,
41
+ "grad_norm": 0.17372883404582676,
42
+ "learning_rate": 3.7037037037037037e-05,
43
+ "loss": 0.9079,
44
+ "mean_token_accuracy": 0.797761058807373,
45
+ "num_tokens": 9211540.0,
46
  "step": 20
47
  },
48
  {
49
+ "epoch": 2.5,
50
+ "grad_norm": 0.3250721867237713,
51
+ "learning_rate": 3.240740740740741e-05,
52
+ "loss": 0.8587,
53
+ "mean_token_accuracy": 0.80385662317276,
54
+ "num_tokens": 11676501.0,
55
+ "step": 25
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "grad_norm": 0.12878896880550417,
60
+ "learning_rate": 2.777777777777778e-05,
61
+ "loss": 0.8491,
62
+ "mean_token_accuracy": 0.8127783119678498,
63
+ "num_tokens": 13823724.0,
64
+ "step": 30
65
+ },
66
+ {
67
+ "epoch": 3.5,
68
+ "grad_norm": 0.12156986655948128,
69
+ "learning_rate": 2.314814814814815e-05,
70
+ "loss": 0.8216,
71
+ "mean_token_accuracy": 0.8109423339366912,
72
+ "num_tokens": 16296257.0,
73
+ "step": 35
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "grad_norm": 0.11634538375626934,
78
+ "learning_rate": 1.8518518518518518e-05,
79
+ "loss": 0.7842,
80
+ "mean_token_accuracy": 0.8188613414764404,
81
+ "num_tokens": 18424879.0,
82
+ "step": 40
83
+ },
84
+ {
85
+ "epoch": 4.5,
86
+ "grad_norm": 0.10458399531482464,
87
+ "learning_rate": 1.388888888888889e-05,
88
+ "loss": 0.7492,
89
+ "mean_token_accuracy": 0.8257159292697906,
90
+ "num_tokens": 20909688.0,
91
+ "step": 45
92
+ },
93
+ {
94
+ "epoch": 5.0,
95
+ "grad_norm": 0.10973027739730677,
96
+ "learning_rate": 9.259259259259259e-06,
97
+ "loss": 0.7812,
98
+ "mean_token_accuracy": 0.8185243546962738,
99
+ "num_tokens": 23043160.0,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 5.5,
104
+ "grad_norm": 0.11172428707492313,
105
+ "learning_rate": 4.6296296296296296e-06,
106
+ "loss": 0.7663,
107
+ "mean_token_accuracy": 0.8235744714736939,
108
+ "num_tokens": 25563345.0,
109
+ "step": 55
110
+ },
111
+ {
112
+ "epoch": 6.0,
113
+ "grad_norm": 0.1207429261911167,
114
+ "learning_rate": 0.0,
115
+ "loss": 0.7442,
116
+ "mean_token_accuracy": 0.8240064084529877,
117
+ "num_tokens": 27655450.0,
118
+ "step": 60
119
+ },
120
+ {
121
+ "epoch": 6.0,
122
+ "step": 60,
123
+ "total_flos": 18782965727232.0,
124
+ "train_loss": 0.8722121318181356,
125
+ "train_runtime": 466.0776,
126
+ "train_samples_per_second": 7.853,
127
+ "train_steps_per_second": 0.129
128
  }
129
  ],
130
  "logging_steps": 5,
131
+ "max_steps": 60,
132
  "num_input_tokens_seen": 0,
133
+ "num_train_epochs": 6,
134
  "save_steps": 500,
135
  "stateful_callbacks": {
136
  "TrainerControl": {
 
144
  "attributes": {}
145
  }
146
  },
147
+ "total_flos": 18782965727232.0,
148
  "train_batch_size": 4,
149
  "trial_name": null,
150
  "trial_params": null