tangledgroup
/

tangled-alpha-0.9-core

@@ -124,15 +124,31 @@ eval:
 #       - 0.9
 #       - 0.999
 optimizer:
-  class_path: sophia_opt.SophiaG
   init_args:
     lr: 1e-4
     betas:
       - 0.9
-      - 0.95
-    rho: 0.05
-    weight_decay: 0.1
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto

 #       - 0.9
 #       - 0.999
+# optimizer:
+#   class_path: sophia_opt.SophiaG
+#   init_args:
+#     lr: 1e-4
+#     betas:
+#       - 0.9
+#       - 0.95
+#     rho: 0.05
+#     weight_decay: 0.1
 optimizer:
+  # class_path: torch.optim.AdamW
+  class_path: torchao.prototype.low_bit_optim.AdamW8bit
+  # class_path: torchao.prototype.low_bit_optim.AdamW4bit
+  # class_path: bitsandbytes.optim.AdamW8bit
+  # class_path: bitsandbytes.optim.PagedAdamW8bit
   init_args:
+    # (type: float, default: 0.001)
     lr: 1e-4
+    # (type: float, default: 0.01)
+    weight_decay: 0.01
+    # (type: tuple, default: (0.9,0.999))
     betas:
       - 0.9
+      - 0.999
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto