Hi, thanks for this repository! So far it works quite well, but now I suddenly encountered a weird error after 11 optimization steps of non-batched HEBO:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_2773121/4102601230.py in <module>
35
36 for i in range(opt_steps):
---> 37 rec = opt.suggest()
38 if "bs" in rec:
39 rec["bs"] = 2 ** rec["bs"]
~/.local/lib/python3.8/site-packages/hebo/optimizers/hebo.py in suggest(self, n_suggestions, fix_input)
151 sig = Sigma(model, linear_a = -1.)
152 opt = EvolutionOpt(self.space, acq, pop = 100, iters = 100, verbose = False, es=self.es)
--> 153 rec = opt.optimize(initial_suggest = best_x, fix_input = fix_input).drop_duplicates()
154 rec = rec[self.check_unique(rec)]
155
~/.local/lib/python3.8/site-packages/hebo/acq_optimizers/evolution_optimizer.py in optimize(self, initial_suggest, fix_input, return_pop)
125 crossover = self.get_crossover()
126 algo = get_algorithm(self.es, pop_size = self.pop, sampling = init_pop, mutation = mutation, crossover = crossover, repair = self.repair)
--> 127 res = minimize(prob, algo, ('n_gen', self.iter), verbose = self.verbose)
128 if res.X is not None and not return_pop:
129 opt_x = res.X.reshape(-1, len(lb)).astype(float)
~/.local/lib/python3.8/site-packages/pymoo/optimize.py in minimize(problem, algorithm, termination, copy_algorithm, copy_termination, **kwargs)
81
82 # actually execute the algorithm
---> 83 res = algorithm.run()
84
85 # store the deep copied algorithm in the result object
~/.local/lib/python3.8/site-packages/pymoo/core/algorithm.py in run(self)
211 # while termination criterion not fulfilled
212 while self.has_next():
--> 213 self.next()
214
215 # create the result object to be returned
~/.local/lib/python3.8/site-packages/pymoo/core/algorithm.py in next(self)
231 # call the advance with them after evaluation
232 if infills is not None:
--> 233 self.evaluator.eval(self.problem, infills, algorithm=self)
234 self.advance(infills=infills)
235
~/.local/lib/python3.8/site-packages/pymoo/core/evaluator.py in eval(self, problem, pop, skip_already_evaluated, evaluate_values_of, count_evals, **kwargs)
93 # actually evaluate all solutions using the function that can be overwritten
94 if len(I) > 0:
---> 95 self._eval(problem, pop[I], evaluate_values_of=evaluate_values_of, **kwargs)
96
97 # set the feasibility attribute if cv exists
~/.local/lib/python3.8/site-packages/pymoo/core/evaluator.py in _eval(self, problem, pop, evaluate_values_of, **kwargs)
110 evaluate_values_of = self.evaluate_values_of if evaluate_values_of is None else evaluate_values_of
111
--> 112 out = problem.evaluate(pop.get("X"),
113 return_values_of=evaluate_values_of,
114 return_as_dictionary=True,
~/.local/lib/python3.8/site-packages/pymoo/core/problem.py in evaluate(self, X, return_values_of, return_as_dictionary, *args, **kwargs)
122
123 # do the actual evaluation for the given problem - calls in _evaluate method internally
--> 124 self.do(X, out, *args, **kwargs)
125
126 # make sure the array is 2d before doing the shape check
~/.local/lib/python3.8/site-packages/pymoo/core/problem.py in do(self, X, out, *args, **kwargs)
160
161 def do(self, X, out, *args, **kwargs):
--> 162 self._evaluate(X, out, *args, **kwargs)
163 out_to_2d_ndarray(out)
164
~/.local/lib/python3.8/site-packages/hebo/acq_optimizers/evolution_optimizer.py in _evaluate(self, x, out, *args, **kwargs)
46
47 with torch.no_grad():
---> 48 acq_eval = self.acq(xcont, xenum).numpy().reshape(num_x, self.acq.num_obj + self.acq.num_constr)
49 out['F'] = acq_eval[:, :self.acq.num_obj]
50
~/.local/lib/python3.8/site-packages/hebo/acquisitions/acq.py in __call__(self, x, xe)
37
38 def __call__(self, x : Tensor, xe : Tensor):
---> 39 return self.eval(x, xe)
40
41 class SingleObjectiveAcq(Acquisition):
~/.local/lib/python3.8/site-packages/hebo/acquisitions/acq.py in eval(self, x, xe)
155 normed = ((self.tau - self.eps - py - noise * torch.randn(py.shape)) / ps)
156 dist = Normal(0., 1.)
--> 157 log_phi = dist.log_prob(normed)
158 Phi = dist.cdf(normed)
159 PI = Phi
~/.local/lib/python3.8/site-packages/torch/distributions/normal.py in log_prob(self, value)
71 def log_prob(self, value):
72 if self._validate_args:
---> 73 self._validate_sample(value)
74 # compute the variance
75 var = (self.scale ** 2)
~/.local/lib/python3.8/site-packages/torch/distributions/distribution.py in _validate_sample(self, value)
286 valid = support.check(value)
287 if not valid.all():
--> 288 raise ValueError(
289 "Expected value argument "
290 f"({type(value).__name__} of shape {tuple(value.shape)}) "
ValueError: Expected value argument (Tensor of shape (100, 1)) to be within the support (Real()) of the distribution Normal(loc: 0.0, scale: 1.0), but found invalid values:
tensor([[ -1.1836],
[ -1.2862],
[-11.6360],
[-11.3412],
[ 0.3811],
[ -2.0235],
[ -1.7288],
[ -8.3472],
[-10.1714],
[ -2.6084],
[ -0.8098],
[ -0.9687],
[ -9.0626],
[ -2.2273],
[ -9.0942],
[ -1.6956],
[ -6.6197],
[ -9.3882],
[ -6.1594],
[ -9.2895],
[ -1.7074],
[ 0.8382],
[-14.6693],
[ -0.8303],
[-10.2741],
[ 0.2808],
[ -9.3681],
[ -0.6729],
[ -2.0288],
[ -1.4389],
[ -7.1975],
[-11.5732],
[-10.2751],
[ -1.3800],
[ -1.9773],
[ -1.4668],
[ -9.7166],
[ -8.3093],
[-15.5914],
[ -0.0808],
[ 0.3732],
[-16.2714],
[ -2.3120],
[ -8.7503],
[ -1.6276],
[ nan],
[-15.3692],
[ -9.1615],
[ -9.8093],
[ -2.0716],
[ -1.9259],
[ 0.9543],
[ -8.1521],
[ -2.5709],
[ -1.6153],
[-10.7236],
[ -0.0763],
[ 0.0543],
[ -7.2755],
[-10.6411],
[ -7.9253],
[-19.4996],
[ -2.0001],
[-11.7616],
[-11.0187],
[-12.0727],
[ -1.3243],
[-11.2528],
[ -1.5527],
[ -0.9219],
[ -1.0130],
[-10.1825],
[-18.3420],
[-11.1005],
[ -8.5818],
[-11.1588],
[ -8.8115],
[ -1.0410],
[-15.2722],
[ -1.8399],
[ -1.0827],
[ -1.0277],
[ -6.4768],
[ -8.3902],
[ -0.9513],
[ -1.3429],
[ -1.0889],
[ -7.2952],
[ -7.8548],
[ -0.0231],
[ -7.1898],
[-20.4194],
[ -1.2503],
[-19.6157],
[ -0.3398],
[-15.7221],
[-10.3210],
[ -9.5764],
[ -0.2335],
[ -0.3788]])
Seems like there is a NaN in some distribution of HEBO. But my input parameters (opt.X) and losses (opt.y) are never NaN.
This is the design space I'm using:
space = DesignSpace().parse([{'name': 'lr', 'type' : 'num', 'lb' : 0.00005, 'ub' : 0.1},
{'name': 'n_estimators', 'type' : 'int', 'lb' : 1, 'ub' : 20}, # multiplied by 10
{'name': 'max_depth', 'type' : 'int', 'lb' : 1, 'ub' : 10},
{'name': 'subsample', 'type' : 'num', 'lb' : 0.5, 'ub' : 0.99},
{'name': 'colsample_bytree', 'type' : 'num', 'lb' : 0.5, 'ub' : 0.99},
{'name': 'gamma', 'type' : 'num', 'lb' : 0.01, 'ub' : 10.0},
{'name': 'min_child_weight', 'type' : 'int', 'lb' : 1, 'ub' : 10},
{'name': 'fill_type', 'type' : 'cat', 'categories' : ['median', 'pat_median','pat_ema']},
{'name': 'flat_block_size', 'type' : 'int', 'lb' : 1, 'ub' : 1}
])
opt = HEBO(space)
I already commented out flat_block_size
as I thought that maybe it is a problem if lb == ub
, but it still crashes.
Any ideas on how I can debug this?