|
|
|
import os, sys |
|
import os.path as osp |
|
import numpy as np |
|
import torch |
|
from torch import nn |
|
from torch.optim import Optimizer |
|
from functools import reduce |
|
from torch.optim import AdamW |
|
|
|
class MultiOptimizer: |
|
def __init__(self, optimizers={}, schedulers={}): |
|
self.optimizers = optimizers |
|
self.schedulers = schedulers |
|
self.keys = list(optimizers.keys()) |
|
self.param_groups = reduce(lambda x,y: x+y, [v.param_groups for v in self.optimizers.values()]) |
|
|
|
def state_dict(self): |
|
state_dicts = [(key, self.optimizers[key].state_dict())\ |
|
for key in self.keys] |
|
return state_dicts |
|
|
|
def load_state_dict(self, state_dict): |
|
for key, val in state_dict: |
|
try: |
|
self.optimizers[key].load_state_dict(val) |
|
except: |
|
print("Unloaded %s" % key) |
|
|
|
|
|
def step(self, key=None): |
|
if key is not None: |
|
self.optimizers[key].step() |
|
else: |
|
_ = [self.optimizers[key].step() for key in self.keys] |
|
|
|
def zero_grad(self, key=None): |
|
if key is not None: |
|
self.optimizers[key].zero_grad() |
|
else: |
|
_ = [self.optimizers[key].zero_grad() for key in self.keys] |
|
|
|
def scheduler(self, *args, key=None): |
|
if key is not None: |
|
self.schedulers[key].step(*args) |
|
else: |
|
_ = [self.schedulers[key].step(*args) for key in self.keys] |
|
|
|
|
|
def build_optimizer(parameters): |
|
optimizer, scheduler = _define_optimizer(parameters) |
|
return optimizer, scheduler |
|
|
|
def _define_optimizer(params): |
|
optimizer_params = params['optimizer_params'] |
|
sch_params = params['scheduler_params'] |
|
optimizer = AdamW( |
|
params['params'], |
|
lr=optimizer_params.get('lr', 1e-4), |
|
weight_decay=optimizer_params.get('weight_decay', 5e-4), |
|
betas=(0.9, 0.98), |
|
eps=1e-9) |
|
scheduler = _define_scheduler(optimizer, sch_params) |
|
return optimizer, scheduler |
|
|
|
def _define_scheduler(optimizer, params): |
|
print(params) |
|
scheduler = torch.optim.lr_scheduler.OneCycleLR( |
|
optimizer, |
|
max_lr=params.get('max_lr', 5e-4), |
|
epochs=params.get('epochs', 200), |
|
steps_per_epoch=params.get('steps_per_epoch', 1000), |
|
pct_start=params.get('pct_start', 0.0), |
|
final_div_factor=5) |
|
|
|
return scheduler |
|
|
|
def build_multi_optimizer(parameters_dict, scheduler_params): |
|
optim = dict([(key, AdamW(params, lr=1e-4, weight_decay=1e-6, betas=(0.9, 0.98), eps=1e-9)) |
|
for key, params in parameters_dict.items()]) |
|
|
|
schedulers = dict([(key, _define_scheduler(opt, scheduler_params)) \ |
|
for key, opt in optim.items()]) |
|
|
|
multi_optim = MultiOptimizer(optim, schedulers) |
|
return multi_optim |
|
|