Commit
·
3a91c0a
1
Parent(s):
d2c0d5d
upload diff rewards
Browse files- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/added_tokens.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/config.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/generation_config.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/merges.txt +0 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00001-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00002-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00003-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00004-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00005-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00006-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model.safetensors.index.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/special_tokens_map.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer_config.json +3 -0
- qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/vocab.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/added_tokens.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/config.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/generation_config.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/merges.txt +0 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00001-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00002-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00003-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00004-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00005-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00006-of-00006.safetensors +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model.safetensors.index.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/special_tokens_map.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer_config.json +3 -0
- qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/vocab.json +3 -0
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
|
3 |
+
size 80
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
|
3 |
+
size 1008
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
|
3 |
+
size 139
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00001-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88c48c72dea8126c224619337c04a95975298d71f1eacb427aeaddd4d335cc51
|
3 |
+
size 4996577736
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00002-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7482d1d045ad7718555fdad6d428c654f242a580a057ea313a249731bbd3dac
|
3 |
+
size 4996347752
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00003-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17cc1febd163c8e351cb941cae6e68f66e828e9ddcd45d5569a4353ef52dca29
|
3 |
+
size 4997127120
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00004-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8a1fc873a3e77184c2a6c450fefe44e77b83f83ed3c4e78918a0a0cfe516e4e
|
3 |
+
size 4985592520
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00005-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2fb06f398220d95d83c023a723df85ecd5fdd15a7c41241ed5a7c930c36f4cf
|
3 |
+
size 4996348976
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00006-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a618be11363c16e0468bb5332a7f590f293aa240aeaa501cdb168d098329f597
|
3 |
+
size 3660151400
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model.safetensors.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
|
3 |
+
size 416452
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
|
3 |
+
size 370
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
|
3 |
+
size 11418365
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
|
3 |
+
size 1331
|
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
|
3 |
+
size 80
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
|
3 |
+
size 1008
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/generation_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
|
3 |
+
size 139
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00001-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68c35f9801c3129aac4f5901ffddc11b054115956f8f8d52f236404d6e227399
|
3 |
+
size 4996577736
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00002-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b1e393d74af4d34f4ace607d795f34646d5826239ed39a8cf170475aad9462b
|
3 |
+
size 4996347752
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00003-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37b4432c9934386cf1dfd044b77f34d228da6e1358407e91fac0ebe16976c963
|
3 |
+
size 4997127120
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00004-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95807d0ef156dc55e09707bbd5bbc5bbaa1630b3e299657848e29346769b1619
|
3 |
+
size 4985592520
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00005-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efb31a11ea6f207d7802bca9016bb1df2b8005fdf4c38b158c45dc16865340b4
|
3 |
+
size 4996348976
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00006-of-00006.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ffbb0f96bbe6fbe17a3f29c1e5954bc0cf920aa47fe7b2ec200fd77709cc21f
|
3 |
+
size 3660151400
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model.safetensors.index.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
|
3 |
+
size 416452
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
|
3 |
+
size 370
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
|
3 |
+
size 11418365
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
|
3 |
+
size 1331
|
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/vocab.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
3 |
+
size 2776833
|