Skip to content

Commit

Permalink
ENH: Add Backtest.optimize(method="sambo")
Browse files Browse the repository at this point in the history
  • Loading branch information
kernc committed Jan 21, 2025
1 parent f3a0bc1 commit 9184a0b
Show file tree
Hide file tree
Showing 6 changed files with 396 additions and 363 deletions.
112 changes: 45 additions & 67 deletions backtesting/backtesting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from concurrent.futures import ProcessPoolExecutor, as_completed
from copy import copy
from functools import lru_cache, partial
from itertools import chain, compress, product, repeat
from itertools import chain, product, repeat
from math import copysign
from numbers import Number
from typing import Callable, Dict, List, Optional, Sequence, Tuple, Type, Union
Expand Down Expand Up @@ -1278,19 +1278,18 @@ def optimize(self, *,
* `"grid"` which does an exhaustive (or randomized) search over the
cartesian product of parameter combinations, and
* `"skopt"` which finds close-to-optimal strategy parameters using
* `"sambo"` which finds close-to-optimal strategy parameters using
[model-based optimization], making at most `max_tries` evaluations.
[model-based optimization]: \
https://scikit-optimize.github.io/stable/auto_examples/bayesian-optimization.html
[model-based optimization]: https://sambo-optimization.github.io
`max_tries` is the maximal number of strategy runs to perform.
If `method="grid"`, this results in randomized grid search.
If `max_tries` is a floating value between (0, 1], this sets the
number of runs to approximately that fraction of full grid space.
Alternatively, if integer, it denotes the absolute maximum number
of evaluations. If unspecified (default), grid search is exhaustive,
whereas for `method="skopt"`, `max_tries` is set to 200.
whereas for `method="sambo"`, `max_tries` is set to 200.
`constraint` is a function that accepts a dict-like object of
parameters (with values) and returns `True` when the combination
Expand All @@ -1303,16 +1302,14 @@ def optimize(self, *,
inspected or projected onto 2D to plot a heatmap
(see `backtesting.lib.plot_heatmaps()`).
If `return_optimization` is True and `method = 'skopt'`,
If `return_optimization` is True and `method = 'sambo'`,
in addition to result series (and maybe heatmap), return raw
[`scipy.optimize.OptimizeResult`][OptimizeResult] for further
inspection, e.g. with [scikit-optimize]\
[plotting tools].
inspection, e.g. with [SAMBO]'s [plotting tools].
[OptimizeResult]: \
https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
[scikit-optimize]: https://scikit-optimize.github.io
[plotting tools]: https://scikit-optimize.github.io/stable/modules/plots.html
[OptimizeResult]: https://sambo-optimization.github.io/doc/sambo/#sambo.OptimizeResult
[SAMBO]: https://sambo-optimization.github.io
[plotting tools]: https://sambo-optimization.github.io/doc/sambo/plot.html
If you want reproducible optimization results, set `random_state`
to a fixed integer random seed.
Expand Down Expand Up @@ -1360,8 +1357,12 @@ def constraint(_):
"the combination of parameters is admissible or not")
assert callable(constraint), constraint

if return_optimization and method != 'skopt':
raise ValueError("return_optimization=True only valid if method='skopt'")
if method == 'skopt':
method = 'sambo'
warnings.warn('`Backtest.optimize(method="skopt")` is deprecated. Use `method="sambo"`.',
DeprecationWarning, stacklevel=2)
if return_optimization and method != 'sambo':
raise ValueError("return_optimization=True only valid if method='sambo'")

def _tuple(x):
return x if isinstance(x, Sequence) and not isinstance(x, str) else (x,)
Expand Down Expand Up @@ -1456,18 +1457,13 @@ def _batch(seq):
return stats, heatmap
return stats

def _optimize_skopt() -> Union[pd.Series,
def _optimize_sambo() -> Union[pd.Series,
Tuple[pd.Series, pd.Series],
Tuple[pd.Series, pd.Series, dict]]:
try:
from skopt import forest_minimize
from skopt.callbacks import DeltaXStopper
from skopt.learning import ExtraTreesRegressor
from skopt.space import Categorical, Integer, Real
from skopt.utils import use_named_args
import sambo
except ImportError:
raise ImportError("Need package 'scikit-optimize' for method='skopt'. "
"pip install scikit-optimize") from None
raise ImportError("Need package 'sambo' for method='sambo'. pip install sambo") from None

nonlocal max_tries
max_tries = (200 if max_tries is None else
Expand All @@ -1478,80 +1474,62 @@ def _optimize_skopt() -> Union[pd.Series,
for key, values in kwargs.items():
values = np.asarray(values)
if values.dtype.kind in 'mM': # timedelta, datetime64
# these dtypes are unsupported in skopt, so convert to raw int
# these dtypes are unsupported in SAMBO, so convert to raw int
# TODO: save dtype and convert back later
values = values.astype(int)

if values.dtype.kind in 'iumM':
dimensions.append(Integer(low=values.min(), high=values.max(), name=key))
dimensions.append((values.min(), values.max() + 1))
elif values.dtype.kind == 'f':
dimensions.append(Real(low=values.min(), high=values.max(), name=key))
dimensions.append((values.min(), values.max()))
else:
dimensions.append(Categorical(values.tolist(), name=key, transform='onehot'))
dimensions.append(values.tolist())

# Avoid recomputing re-evaluations:
# "The objective has been evaluated at this point before."
# https://github.com/scikit-optimize/scikit-optimize/issues/302
memoized_run = lru_cache()(lambda tup: self.run(**dict(tup)))
memoized_run = lru_cache()(lambda tup: self.run(**dict(tup))) # XXX: Reeval if this needed?
progress = iter(_tqdm(repeat(None), total=max_tries, leave=False, desc='Backtest.optimize'))
_names = tuple(kwargs.keys())

# np.inf/np.nan breaks sklearn, np.finfo(float).max breaks skopt.plots.plot_objective
INVALID = 1e300
progress = iter(_tqdm(repeat(None), total=max_tries, desc='Backtest.optimize'))

@use_named_args(dimensions=dimensions)
def objective_function(**params):
def objective_function(x):
nonlocal progress, memoized_run, constraint, _names
next(progress)
# Check constraints
# TODO: Adjust after https://github.com/scikit-optimize/scikit-optimize/pull/971
if not constraint(AttrDict(params)):
return INVALID
res = memoized_run(tuple(params.items()))
res = memoized_run(tuple(zip(_names, x)))
value = -maximize(res)
if np.isnan(value):
return INVALID
return value

with warnings.catch_warnings():
warnings.filterwarnings(
'ignore', 'The objective has been evaluated at this point before.')

res = forest_minimize(
func=objective_function,
dimensions=dimensions,
n_calls=max_tries,
base_estimator=ExtraTreesRegressor(n_estimators=20, min_samples_leaf=2),
acq_func='LCB',
kappa=3,
n_initial_points=min(max_tries, 20 + 3 * len(kwargs)),
initial_point_generator='lhs', # 'sobel' requires n_initial_points ~ 2**N
callback=DeltaXStopper(9e-7),
random_state=random_state)
return 0 if np.isnan(value) else value

def cons(x):
nonlocal constraint, _names
return constraint(AttrDict(zip(_names, x)))

res = sambo.minimize(
fun=objective_function,
bounds=dimensions,
constraints=cons,
max_iter=max_tries,
method='sceua',
rng=random_state)

stats = self.run(**dict(zip(kwargs.keys(), res.x)))
output = [stats]

if return_heatmap:
heatmap = pd.Series(dict(zip(map(tuple, res.x_iters), -res.func_vals)),
heatmap = pd.Series(dict(zip(map(tuple, res.xv), -res.funv)),
name=maximize_key)
heatmap.index.names = kwargs.keys()
heatmap = heatmap[heatmap != -INVALID]
heatmap.sort_index(inplace=True)
output.append(heatmap)

if return_optimization:
valid = res.func_vals != INVALID
res.x_iters = list(compress(res.x_iters, valid))
res.func_vals = res.func_vals[valid]
output.append(res)

return stats if len(output) == 1 else tuple(output)

if method == 'grid':
output = _optimize_grid()
elif method == 'skopt':
output = _optimize_skopt()
elif method in ('sambo', 'skopt'):
output = _optimize_sambo()
else:
raise ValueError(f"Method should be 'grid' or 'skopt', not {method!r}")
raise ValueError(f"Method should be 'grid' or 'sambo', not {method!r}")
return output

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions backtesting/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,10 @@ def plot_heatmaps(heatmap: pd.Series,
.. todo::
Lay heatmaps out lower-triangular instead of in a simple grid.
Like [`skopt.plots.plot_objective()`][plot_objective] does.
Like [`sambo.plot.plot_objective()`][plot_objective] does.
[plot_objective]: \
https://scikit-optimize.github.io/stable/modules/plots.html#plot-objective
https://sambo-optimization.github.io/doc/sambo/plot.html#sambo.plot.plot_objective
"""
return _plot_heatmaps(heatmap, agg, ncols, filename, plot_width, open_browser)

Expand Down
12 changes: 6 additions & 6 deletions backtesting/test/_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,30 +550,30 @@ def test_optimize(self):
with _tempfile() as f:
bt.plot(filename=f, open_browser=False)

def test_method_skopt(self):
def test_method_sambo(self):
bt = Backtest(GOOG.iloc[:100], SmaCross)
res, heatmap, skopt_results = bt.optimize(
res, heatmap, sambo_results = bt.optimize(
fast=range(2, 20), slow=np.arange(2, 20, dtype=object),
constraint=lambda p: p.fast < p.slow,
max_tries=30,
method='skopt',
method='sambo',
return_optimization=True,
return_heatmap=True,
random_state=2)
self.assertIsInstance(res, pd.Series)
self.assertIsInstance(heatmap, pd.Series)
self.assertGreater(heatmap.max(), 1.1)
self.assertGreater(heatmap.min(), -2)
self.assertEqual(-skopt_results.fun, heatmap.max())
self.assertEqual(-sambo_results.fun, heatmap.max())
self.assertEqual(heatmap.index.tolist(), heatmap.dropna().index.unique().tolist())

def test_max_tries(self):
bt = Backtest(GOOG.iloc[:100], SmaCross)
OPT_PARAMS = {'fast': range(2, 10, 2), 'slow': [2, 5, 7, 9]}
for method, max_tries, random_state in (('grid', 5, 0),
('grid', .3, 0),
('skopt', 7, 0),
('skopt', .45, 0)):
('sambo', 6, 0),
('sambo', .42, 0)):
with self.subTest(method=method,
max_tries=max_tries,
random_state=random_state):
Expand Down
Loading

0 comments on commit 9184a0b

Please sign in to comment.