Spaces:
Sleeping
Sleeping
File size: 16,631 Bytes
165ee00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 |
import os
from copy import deepcopy
from bayesmark.abstract_optimizer import AbstractOptimizer
from bayesmark.experiment import experiment_main
from bayesmark.space import JointSpace
import torch
import logging
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np
from sklearn.compose import ColumnTransformer
from scipy.special import logit, expit
# from scipy.stats import qmc
from torch.quasirandom import SobolEngine
from .scripts import acquisition_functions, tune_input_warping
class PFNOptimizer(AbstractOptimizer):
# Used for determining the version number of package used
# primary_import = ""
def __init__(self, api_config, pfn_file, minimize=True, acqf_optimizer_name="lbfgs", sobol_sampler=False,
device="cpu:0", fit_encoder_from_step=None, verbose=False, rand_bool=False, sample_only_valid=False,
round_suggests_to=4, min_initial_design=0, max_initial_design=None, rand_sugg_after_x_steps_of_stagnation=None,
fixed_initial_guess=None,minmax_encode_y=False,**acqf_kwargs):
"""Build wrapper class to use optimizer in benchmark.
Parameters
----------
api_config : dict-like of dict-like
Configuration of the optimization variables. See API description.
"""
assert not 'fit_encoder' in acqf_kwargs
AbstractOptimizer.__init__(self, api_config)
# Do whatever other setup is needed
# ...
self.space_x = JointSpace(api_config)
self.bounds = self.space_x.get_bounds()
self.device = device
self.model = torch.load(pfn_file) if pfn_file.startswith('/') else torch.load(os.path.dirname(__file__) + '/' + pfn_file)
self.X = []
self.y = []
self.api_config = {key: value for key, value in sorted(api_config.items())}
self.hp_names = list(self.api_config.keys())
# self.model.encoder.num_features = 18
self.epsilon = 1e-8
self.minimize = minimize
self.sobol_sampler = sobol_sampler
self.create_scaler()
self.sobol = SobolEngine(len(self.max_values), scramble=True)
self.acqf_optimizer_name = acqf_optimizer_name
self.acqf_kwargs = acqf_kwargs
self.fit_encoder_from_step = fit_encoder_from_step
assert not (rand_bool and sample_only_valid)
self.rand_bool = rand_bool
self.sample_only_valid = sample_only_valid
self.verbose = verbose
self.round_suggests_to = round_suggests_to
self.min_initial_design = min_initial_design
self.max_initial_design = max_initial_design
self.fixed_initial_guess = fixed_initial_guess
self.minmax_encode_y = minmax_encode_y
self.rand_sugg_after_x_steps_of_stagnation = rand_sugg_after_x_steps_of_stagnation
self.model.eval()
print(api_config)
def create_scaler(self):
list_of_scalers = []
self.min_values = []
self.max_values = []
self.spaces = []
self.types = []
for i, feature in enumerate(self.api_config):
# list_of_scalers.append((feature, MinMaxScaler(feature_range),i))
self.spaces.append(self.api_config[feature].get("space", "bool"))
self.types.append(self.api_config[feature]["type"])
if self.types[-1] == "bool":
feature_range = [0, 1]
else:
feature_range = list(self.api_config[feature]["range"])
feature_range[0] = self.transform_feature(feature_range[0], -1)
feature_range[1] = self.transform_feature(feature_range[1], -1)
self.min_values.append(feature_range[0])
self.max_values.append(feature_range[1])
self.column_scaler = ColumnTransformer(list_of_scalers)
self.max_values: np.array = np.array(self.max_values)
self.min_values: np.array = np.array(self.min_values)
def transform_feature_inverse(self, x, feature_index):
if self.spaces[feature_index] == "log":
x = np.exp(x)
elif self.spaces[feature_index] == "logit":
x = expit(x)
if self.types[feature_index] == "int":
if self.rand_bool:
x = int(x) + int(np.random.rand() < (x-int(x)))
else:
x = int(np.round(x))
elif self.types[feature_index] == "bool":
if self.rand_bool:
x = np.random.rand() < x
else:
x = bool(np.round(x))
return x
def transform_feature(self, x, feature_index):
if np.isinf(x) or np.isnan(x):
return 0
if self.spaces[feature_index] == "log":
x = np.log(x)
elif self.spaces[feature_index] == "logit":
x = logit(x)
elif self.types[feature_index] == "bool":
x = int(x)
return x
def random_suggest(self):
self.rand_prev = True
if self.sobol_sampler:
# sampler = qmc.Sobol(d=len(self.max_values), scramble=False)
# temp_guess = sampler.random_base2(m=len(self.max_values))
temp_guess = self.sobol.draw(1).numpy()[0]
temp_guess = temp_guess * (self.max_values - self.min_values) + self.min_values
x_guess = {}
for j, feature in enumerate(self.api_config):
x = self.transform_feature_inverse(temp_guess[j], j)
x_guess[feature] = x
x_guess = [x_guess]
else:
x_guess = {}
for i, feature in enumerate(self.api_config):
temp_guess = np.random.uniform(self.min_values[i], self.max_values[i], 1)[0]
temp_guess = self.transform_feature_inverse(temp_guess, i)
x_guess[feature] = temp_guess
x_guess = [x_guess]
return x_guess
def transform_back(self, x_guess):
if self.round_suggests_to is not None:
x_guess = np.round(x_guess, self.round_suggests_to) # make sure very similar values are actually the same
x_guess = x_guess * (self.max_values - self.min_values) + self.min_values
x_guess = x_guess.tolist()
return self.transform_inverse(x_guess)
def min_max_encode(self, temp_X):
# this, combined with transform is the inverse of transform_back
temp_X = (temp_X - self.min_values) / (self.max_values - self.min_values)
temp_X = torch.tensor(temp_X).to(torch.float32)
temp_X = torch.clamp(temp_X, min=0., max=1.)
return temp_X
@torch.no_grad()
def suggest(self, n_suggestions=1):
"""Get suggestion from the optimizer.
Parameters
----------
n_suggestions : int
Desired number of parallel suggestions in the output
Returns
-------
next_guess : list of dict
List of `n_suggestions` suggestions to evaluate the objective
function. Each suggestion is a dictionary where each key
corresponds to a parameter being optimized.
"""
assert n_suggestions == 1, "Only one suggestion at a time is supported"
# Do whatever is needed to get the parallel guesses
# ...
# scaler = MinMaxScaler()
# scaler.fit(self.X)
try:
num_initial_design = max(len(self.bounds), self.min_initial_design)
if self.max_initial_design is not None:
num_initial_design = min(num_initial_design, self.max_initial_design)
if len(self.X) < num_initial_design:
if len(self.X) == 0 and self.fixed_initial_guess is not None:
x_guess = [self.transform_back(np.array([self.fixed_initial_guess for _ in range(len(self.bounds))]))]
else:
x_guess = self.random_suggest()
return x_guess
else:
temp_X = np.array(self.X)
temp_X = self.min_max_encode(temp_X)
if self.minmax_encode_y:
temp_y = MinMaxScaler().fit_transform(np.array(self.y).reshape(-1, 1)).reshape(-1)
else:
temp_y = np.array(self.y)
temp_y = torch.tensor(temp_y).to(torch.float32)
if self.rand_sugg_after_x_steps_of_stagnation is not None \
and len(self.y) > self.rand_sugg_after_x_steps_of_stagnation\
and not self.rand_prev:
if temp_y[:-self.rand_sugg_after_x_steps_of_stagnation].max() == temp_y.max():
print(f"Random suggestion after >= {self.rand_sugg_after_x_steps_of_stagnation} steps of stagnation")
x_guess = self.random_suggest()
return x_guess
if self.verbose:
from matplotlib import pyplot as plt
print(f"{temp_X=}, {temp_y=}")
if temp_X.shape[1] == 2:
from scipy.stats import rankdata
plt.title('Observations, red -> blue.')
plt.scatter(temp_X[:,0], temp_X[:,1], cmap='RdBu', c=rankdata(temp_y))
plt.show()
temp_X = temp_X.to(self.device)
temp_y = temp_y.to(self.device)
if self.fit_encoder_from_step and self.fit_encoder_from_step <= len(self.X):
with torch.enable_grad():
w = tune_input_warping.fit_input_warping(self.model, temp_X, temp_y)
temp_X_warped = w(temp_X).detach()
else:
temp_X_warped = temp_X
with torch.enable_grad():
if self.acqf_optimizer_name == "lbfgs":
def rand_sample_func(n):
pre_samples = torch.rand(n, temp_X_warped.shape[1], device='cpu')
back_transformed_samples = [self.transform_back(sample) for sample in pre_samples]
samples = np.array([self.transform(deepcopy(bt_sample)) for bt_sample in back_transformed_samples])
samples = self.min_max_encode(samples)
return samples.to(self.device)
if self.sample_only_valid:
rand_sample_func = rand_sample_func
# dims with bool or int are not continuous, thus no gradient opt is applied
dims_wo_gradient_opt = [i for i, t in enumerate(self.types) if t != "real"]
else:
rand_sample_func = None
dims_wo_gradient_opt = []
x_guess, x_options, eis, x_rs, x_rs_eis = acquisition_functions.optimize_acq_w_lbfgs(
self.model, temp_X_warped, temp_y, device=self.device,
verbose=self.verbose, rand_sample_func=rand_sample_func,
dims_wo_gradient_opt=dims_wo_gradient_opt, **{'apply_power_transform':True,**self.acqf_kwargs}
)
elif self.acqf_optimizer_name == 'adam':
x_guess = acquisition_functions.optimize_acq(self.model, temp_X_warped, temp_y, apply_power_transform=True, device=self.device, **self.acqf_kwargs
).detach().cpu().numpy()
else:
raise ValueError("Optimizer not recognized, set `acqf_optimizer_name` to 'lbfgs' or 'adam'")
back_transformed_x_options = [self.transform_back(x) for x in x_options]
opt_X = np.array([self.transform(deepcopy(transformed_x_options)) for transformed_x_options in back_transformed_x_options])
opt_X = self.min_max_encode(opt_X)
opt_new = ~(opt_X[:,None] == temp_X[None].cpu()).all(-1).any(1)
for i, x in enumerate(opt_X):
if opt_new[i]:
if self.verbose: print(f"New point at pos {i}: {back_transformed_x_options[i], x_options[i]}")
self.rand_prev = False
return [back_transformed_x_options[i]]
print('backup from initial rand search')
back_transformed_x_options = [self.transform_back(x) for x in x_rs]
opt_X = np.array([self.transform(deepcopy(transformed_x_options)) for transformed_x_options in back_transformed_x_options])
opt_X = self.min_max_encode(opt_X)
opt_new = ~(opt_X[:,None] == temp_X[None].cpu()).all(-1).any(1)
for i, x in enumerate(opt_X):
if opt_new[i]:
if self.verbose: print(f"New point at pos {i}: {back_transformed_x_options[i], x_rs[i]} with ei {x_rs_eis[i]}")
self.rand_prev = False
return [back_transformed_x_options[i]]
print("No new points found, random suggestion")
return self.random_suggest()
except Exception as e:
raise e
def transform(self, X_dict):
X_tf = []
for i, feature in enumerate(X_dict.keys()):
X_dict[feature] = self.transform_feature(X_dict[feature], i)
X_tf.append(X_dict[feature])
return X_tf
def transform_inverse(self, X_list):
X_tf = {}
for i, hp_name in enumerate(self.hp_names):
X_tf[hp_name] = self.transform_feature_inverse(X_list[i], i)
return X_tf
def observe(self, X, y):
"""Feed an observation back.
Parameters
----------
X : list of dict-like
Places where the objective function has already been evaluated.
Each suggestion is a dictionary where each key corresponds to a
parameter being optimized.
y : array-like, shape (n,)
Corresponding values where objective has been evaluated
"""
# Update the model with new objective function observations
# ...
# No return statement needed
if np.isinf(y) and y > 0:
y[:] = 1e10
if not np.isnan(y) and not np.isinf(y):
assert len(y) == 1 and len(X) == 1, "Only one suggestion at a time is supported"
X = {key: value for key, value in sorted(X[0].items())}
assert list(X.keys()) == list(self.api_config.keys()) == list(self.hp_names) == list(
self.space_x.param_list)
if self.verbose:
print(f"{X=}, {y=}")
X = self.transform(X)
if self.verbose:
print(f"transformed {X=}")
self.X.append(X)
if self.minimize:
self.y.append(-y[0])
else:
self.y.append(y[0])
else:
assert False
def test():
from bayesmark.experiment import _build_test_problem, run_study, OBJECTIVE_NAMES
#function_instance = _build_test_problem(model_name='ada', dataset='breast', scorer='nll', path=None)
function_instance = _build_test_problem(model_name='kNN', dataset='boston', scorer='mse', path=None)
# Setup optimizer
api_config = function_instance.get_api_config()
import os
# check is file
config = {
"pfn_file": 'final_models/model_hebo_morebudget_9_unused_features_3.pt',
"minimize": 1,
"device": "cpu:0",
"fit_encoder_from_step": None,
'pre_sample_size': 1_000,
'num_grad_steps': 15_000,
'num_candidates': 10,
'rand_bool': True,
}
opt = PFNOptimizer(api_config, verbose=True, **config)
function_evals, timing, suggest_log = run_study(
opt, function_instance, 50, 1, callback=None, n_obj=len(OBJECTIVE_NAMES),
)
if __name__ == "__main__":
import uuid
from bayesmark.serialize import XRSerializer
from bayesmark.cmd_parse import CmdArgs
import bayesmark.cmd_parse as cmd
import bayesmark.constants as cc
description = "Run a study with one benchmark function and an optimizer"
args = cmd.parse_args(cmd.experiment_parser(description))
run_uuid = uuid.UUID(args[CmdArgs.uuid])
# set global logging level
logging.basicConfig(level=logging.DEBUG)
# This is the entry point for experiments, so pass the class to experiment_main to use this optimizer.
# This statement must be included in the wrapper class file:
experiment_main(PFNOptimizer, args=args) |