shengyi-qian commited on
Commit
3a91c0a
·
1 Parent(s): d2c0d5d

upload diff rewards

Browse files
Files changed (30) hide show
  1. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/added_tokens.json +3 -0
  2. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/config.json +3 -0
  3. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/generation_config.json +3 -0
  4. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/merges.txt +0 -0
  5. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00001-of-00006.safetensors +3 -0
  6. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00002-of-00006.safetensors +3 -0
  7. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00003-of-00006.safetensors +3 -0
  8. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00004-of-00006.safetensors +3 -0
  9. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00005-of-00006.safetensors +3 -0
  10. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00006-of-00006.safetensors +3 -0
  11. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model.safetensors.index.json +3 -0
  12. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/special_tokens_map.json +3 -0
  13. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer.json +3 -0
  14. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer_config.json +3 -0
  15. qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/vocab.json +3 -0
  16. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/added_tokens.json +3 -0
  17. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/config.json +3 -0
  18. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/generation_config.json +3 -0
  19. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/merges.txt +0 -0
  20. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00001-of-00006.safetensors +3 -0
  21. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00002-of-00006.safetensors +3 -0
  22. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00003-of-00006.safetensors +3 -0
  23. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00004-of-00006.safetensors +3 -0
  24. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00005-of-00006.safetensors +3 -0
  25. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00006-of-00006.safetensors +3 -0
  26. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model.safetensors.index.json +3 -0
  27. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/special_tokens_map.json +3 -0
  28. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer.json +3 -0
  29. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer_config.json +3 -0
  30. qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/vocab.json +3 -0
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
3
+ size 80
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
3
+ size 1008
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
3
+ size 139
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c48c72dea8126c224619337c04a95975298d71f1eacb427aeaddd4d335cc51
3
+ size 4996577736
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7482d1d045ad7718555fdad6d428c654f242a580a057ea313a249731bbd3dac
3
+ size 4996347752
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17cc1febd163c8e351cb941cae6e68f66e828e9ddcd45d5569a4353ef52dca29
3
+ size 4997127120
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a1fc873a3e77184c2a6c450fefe44e77b83f83ed3c4e78918a0a0cfe516e4e
3
+ size 4985592520
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2fb06f398220d95d83c023a723df85ecd5fdd15a7c41241ed5a7c930c36f4cf
3
+ size 4996348976
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a618be11363c16e0468bb5332a7f590f293aa240aeaa501cdb168d098329f597
3
+ size 3660151400
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
3
+ size 416452
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
3
+ size 370
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
3
+ size 11418365
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
3
+ size 1331
qwen1.5_base_rule_base_equal_dist_grpo_diff_reward_func/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a475432c61f8d6154d10d28c37671a36e5717daf3d15002a988968fee54a500
3
+ size 80
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528d3faa3d28fe193cae05b08cd0c15e6c173fe3ed86ca4382d22ad120232280
3
+ size 1008
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64249e16a9efa9b4b2560bd2c0b3c44dd0eee067c5792b646bafe09f4676bce8
3
+ size 139
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00001-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c35f9801c3129aac4f5901ffddc11b054115956f8f8d52f236404d6e227399
3
+ size 4996577736
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00002-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b1e393d74af4d34f4ace607d795f34646d5826239ed39a8cf170475aad9462b
3
+ size 4996347752
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00003-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b4432c9934386cf1dfd044b77f34d228da6e1358407e91fac0ebe16976c963
3
+ size 4997127120
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00004-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95807d0ef156dc55e09707bbd5bbc5bbaa1630b3e299657848e29346769b1619
3
+ size 4985592520
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00005-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb31a11ea6f207d7802bca9016bb1df2b8005fdf4c38b158c45dc16865340b4
3
+ size 4996348976
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model-00006-of-00006.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ffbb0f96bbe6fbe17a3f29c1e5954bc0cf920aa47fe7b2ec200fd77709cc21f
3
+ size 3660151400
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:406d3a66074ba34fa2be1c70f976607e4eab4c192dbba1d9f066db2cf6531a42
3
+ size 416452
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf48284de8f4779b1dbf20963a68180002fba2a34a5da72292380c5d9fb6af2
3
+ size 370
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49bebc2336dd0896ee055aa24e6bb65d80539b16751bb9798d6829db3ac4ea3
3
+ size 11418365
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb94faf6d8d2be1485bba6ceb83a7f0c6885357867f2f662e6f3a6783b927c4
3
+ size 1331
qwen1.5_base_rule_base_math_heavy_grpo_diff_reward_func/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833