Juyoung Suk commited on
Commit
5be5d40
·
verified ·
1 Parent(s): bb32afe

Model save

Browse files
Files changed (4) hide show
  1. README.md +12 -16
  2. all_results.json +4 -9
  3. train_results.json +4 -4
  4. trainer_state.json +51 -43
README.md CHANGED
@@ -3,16 +3,11 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
- - trl
11
- - sft
12
- - alignment-handbook
13
- - generated_from_trainer
14
  datasets:
15
- - scottsuk0306/DepthQA
16
  model-index:
17
  - name: zephyr-7b-stem-test
18
  results: []
@@ -23,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
23
 
24
  # zephyr-7b-stem-test
25
 
26
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the scottsuk0306/DepthQA dataset.
27
  It achieves the following results on the evaluation set:
28
- - Loss: 0.4115
29
 
30
  ## Model description
31
 
@@ -62,14 +57,15 @@ The following hyperparameters were used during training:
62
  | Training Loss | Epoch | Step | Validation Loss |
63
  |:-------------:|:-----:|:----:|:---------------:|
64
  | 1.1097 | 1.0 | 1 | 1.1095 |
65
- | 1.1097 | 2.0 | 2 | 1.2840 |
66
- | 1.1097 | 3.0 | 3 | 1.0815 |
67
- | 1.1097 | 4.0 | 4 | 0.8740 |
68
- | 1.0878 | 5.0 | 5 | 0.7459 |
69
- | 1.0878 | 6.0 | 6 | 0.6034 |
70
- | 1.0878 | 7.0 | 7 | 0.5202 |
71
- | 1.0878 | 8.0 | 8 | 0.4572 |
72
- | 1.0878 | 9.0 | 9 | 0.4221 |
 
73
 
74
 
75
  ### Framework versions
 
3
  license: apache-2.0
4
  base_model: alignment-handbook/zephyr-7b-sft-full
5
  tags:
 
6
  - trl
7
  - sft
8
  - generated_from_trainer
 
 
 
 
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: zephyr-7b-stem-test
13
  results: []
 
18
 
19
  # zephyr-7b-stem-test
20
 
21
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4085
24
 
25
  ## Model description
26
 
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
  | 1.1097 | 1.0 | 1 | 1.1095 |
60
+ | 1.1097 | 2.0 | 2 | 1.2839 |
61
+ | 1.1097 | 3.0 | 3 | 1.0813 |
62
+ | 1.1097 | 4.0 | 4 | 0.8745 |
63
+ | 1.0879 | 5.0 | 5 | 0.7450 |
64
+ | 1.0879 | 6.0 | 6 | 0.6025 |
65
+ | 1.0879 | 7.0 | 7 | 0.5167 |
66
+ | 1.0879 | 8.0 | 8 | 0.4539 |
67
+ | 1.0879 | 9.0 | 9 | 0.4191 |
68
+ | 0.547 | 10.0 | 10 | 0.4085 |
69
 
70
 
71
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_loss": 0.41154685616493225,
4
- "eval_runtime": 3.2821,
5
- "eval_samples": 848,
6
- "eval_samples_per_second": 15.234,
7
- "eval_steps_per_second": 0.305,
8
  "total_flos": 8375186227200.0,
9
- "train_loss": 0.0,
10
- "train_runtime": 1.4567,
11
  "train_samples": 848,
12
- "train_samples_per_second": 343.244,
13
- "train_steps_per_second": 6.865
14
  }
 
1
  {
2
  "epoch": 10.0,
 
 
 
 
 
3
  "total_flos": 8375186227200.0,
4
+ "train_loss": 0.8196125388145447,
5
+ "train_runtime": 303.2537,
6
  "train_samples": 848,
7
+ "train_samples_per_second": 1.649,
8
+ "train_steps_per_second": 0.033
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 8375186227200.0,
4
- "train_loss": 0.0,
5
- "train_runtime": 1.4567,
6
  "train_samples": 848,
7
- "train_samples_per_second": 343.244,
8
- "train_steps_per_second": 6.865
9
  }
 
1
  {
2
  "epoch": 10.0,
3
  "total_flos": 8375186227200.0,
4
+ "train_loss": 0.8196125388145447,
5
+ "train_runtime": 303.2537,
6
  "train_samples": 848,
7
+ "train_samples_per_second": 1.649,
8
+ "train_steps_per_second": 0.033
9
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 17.810282459441467,
14
  "learning_rate": 1e-05,
15
  "loss": 1.1097,
16
  "step": 1
@@ -18,97 +18,105 @@
18
  {
19
  "epoch": 1.0,
20
  "eval_loss": 1.1095243692398071,
21
- "eval_runtime": 2.8023,
22
- "eval_samples_per_second": 17.843,
23
- "eval_steps_per_second": 0.357,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_loss": 1.2839607000350952,
29
- "eval_runtime": 2.6692,
30
- "eval_samples_per_second": 18.732,
31
- "eval_steps_per_second": 0.375,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 3.0,
36
- "eval_loss": 1.0814800262451172,
37
- "eval_runtime": 2.6746,
38
- "eval_samples_per_second": 18.695,
39
- "eval_steps_per_second": 0.374,
40
  "step": 3
41
  },
42
  {
43
  "epoch": 4.0,
44
- "eval_loss": 0.874021053314209,
45
- "eval_runtime": 2.6724,
46
- "eval_samples_per_second": 18.71,
47
  "eval_steps_per_second": 0.374,
48
  "step": 4
49
  },
50
  {
51
  "epoch": 5.0,
52
- "grad_norm": 18.20399999295951,
53
  "learning_rate": 5.8682408883346535e-06,
54
- "loss": 1.0878,
55
  "step": 5
56
  },
57
  {
58
  "epoch": 5.0,
59
- "eval_loss": 0.7459021210670471,
60
- "eval_runtime": 2.6898,
61
- "eval_samples_per_second": 18.589,
62
- "eval_steps_per_second": 0.372,
63
  "step": 5
64
  },
65
  {
66
  "epoch": 6.0,
67
- "eval_loss": 0.6033510565757751,
68
- "eval_runtime": 2.6879,
69
- "eval_samples_per_second": 18.602,
70
- "eval_steps_per_second": 0.372,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 7.0,
75
- "eval_loss": 0.5201848745346069,
76
- "eval_runtime": 2.6772,
77
- "eval_samples_per_second": 18.676,
78
- "eval_steps_per_second": 0.374,
79
  "step": 7
80
  },
81
  {
82
  "epoch": 8.0,
83
- "eval_loss": 0.4572071433067322,
84
- "eval_runtime": 2.6696,
85
- "eval_samples_per_second": 18.73,
86
- "eval_steps_per_second": 0.375,
87
  "step": 8
88
  },
89
  {
90
  "epoch": 9.0,
91
- "eval_loss": 0.4221082031726837,
92
- "eval_runtime": 2.677,
93
- "eval_samples_per_second": 18.677,
94
- "eval_steps_per_second": 0.374,
95
  "step": 9
96
  },
97
  {
98
  "epoch": 10.0,
99
- "grad_norm": 3.0214434738231213,
100
  "learning_rate": 0.0,
101
- "loss": 0.5493,
 
 
 
 
 
 
 
 
102
  "step": 10
103
  },
104
  {
105
  "epoch": 10.0,
106
  "step": 10,
107
  "total_flos": 8375186227200.0,
108
- "train_loss": 0.0,
109
- "train_runtime": 1.4567,
110
- "train_samples_per_second": 343.244,
111
- "train_steps_per_second": 6.865
112
  }
113
  ],
114
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 17.81055096288918,
14
  "learning_rate": 1e-05,
15
  "loss": 1.1097,
16
  "step": 1
 
18
  {
19
  "epoch": 1.0,
20
  "eval_loss": 1.1095243692398071,
21
+ "eval_runtime": 2.808,
22
+ "eval_samples_per_second": 17.806,
23
+ "eval_steps_per_second": 0.356,
24
  "step": 1
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_loss": 1.2839224338531494,
29
+ "eval_runtime": 2.6721,
30
+ "eval_samples_per_second": 18.712,
31
+ "eval_steps_per_second": 0.374,
32
  "step": 2
33
  },
34
  {
35
  "epoch": 3.0,
36
+ "eval_loss": 1.081336259841919,
37
+ "eval_runtime": 2.6698,
38
+ "eval_samples_per_second": 18.728,
39
+ "eval_steps_per_second": 0.375,
40
  "step": 3
41
  },
42
  {
43
  "epoch": 4.0,
44
+ "eval_loss": 0.8745010495185852,
45
+ "eval_runtime": 2.6731,
46
+ "eval_samples_per_second": 18.705,
47
  "eval_steps_per_second": 0.374,
48
  "step": 4
49
  },
50
  {
51
  "epoch": 5.0,
52
+ "grad_norm": 18.406465212856943,
53
  "learning_rate": 5.8682408883346535e-06,
54
+ "loss": 1.0879,
55
  "step": 5
56
  },
57
  {
58
  "epoch": 5.0,
59
+ "eval_loss": 0.7450248599052429,
60
+ "eval_runtime": 2.6675,
61
+ "eval_samples_per_second": 18.744,
62
+ "eval_steps_per_second": 0.375,
63
  "step": 5
64
  },
65
  {
66
  "epoch": 6.0,
67
+ "eval_loss": 0.6025453209877014,
68
+ "eval_runtime": 2.6697,
69
+ "eval_samples_per_second": 18.729,
70
+ "eval_steps_per_second": 0.375,
71
  "step": 6
72
  },
73
  {
74
  "epoch": 7.0,
75
+ "eval_loss": 0.5167475938796997,
76
+ "eval_runtime": 2.6908,
77
+ "eval_samples_per_second": 18.582,
78
+ "eval_steps_per_second": 0.372,
79
  "step": 7
80
  },
81
  {
82
  "epoch": 8.0,
83
+ "eval_loss": 0.45393621921539307,
84
+ "eval_runtime": 2.6892,
85
+ "eval_samples_per_second": 18.593,
86
+ "eval_steps_per_second": 0.372,
87
  "step": 8
88
  },
89
  {
90
  "epoch": 9.0,
91
+ "eval_loss": 0.4190720021724701,
92
+ "eval_runtime": 2.6866,
93
+ "eval_samples_per_second": 18.611,
94
+ "eval_steps_per_second": 0.372,
95
  "step": 9
96
  },
97
  {
98
  "epoch": 10.0,
99
+ "grad_norm": 2.9824311781074426,
100
  "learning_rate": 0.0,
101
+ "loss": 0.547,
102
+ "step": 10
103
+ },
104
+ {
105
+ "epoch": 10.0,
106
+ "eval_loss": 0.408511757850647,
107
+ "eval_runtime": 2.6822,
108
+ "eval_samples_per_second": 18.641,
109
+ "eval_steps_per_second": 0.373,
110
  "step": 10
111
  },
112
  {
113
  "epoch": 10.0,
114
  "step": 10,
115
  "total_flos": 8375186227200.0,
116
+ "train_loss": 0.8196125388145447,
117
+ "train_runtime": 303.2537,
118
+ "train_samples_per_second": 1.649,
119
+ "train_steps_per_second": 0.033
120
  }
121
  ],
122
  "logging_steps": 5,