Source code for pybop.optimisers._irprop_plus
#
# Initially based of Pints' IRProp- class.
#
import numpy as np
import pints
from pints import Optimiser as PintsOptimiser
from pints import RectangularBoundaries
[docs]
class IRPropPlusImpl(PintsOptimiser):
"""
The iRprop+ algorithm adjusts step sizes based on the sign of the gradient
in each dimension, increasing step sizes when the sign remains consistent
and decreasing when the sign changes. This implementation includes
weight (parameter) backtracking that reverts the previous step in the
event of a gradient sign changing.
References:
- [1] Igel and Hüsken (2003): Empirical Evaluation of Improved Rprop Algorithms.
- [2] Riedmiller and Braun (1993): A Direct Adaptive Method for Faster Backpropagation.
- [3] Igel and Husk (2003): Improving the Rprop Learning Algorithm.
Parameters
----------
x0 : array-like
Initial starting point for the optimisation.
sigma0 : float or array-like, optional
Initial step size(s). If a scalar is provided, it is applied to all dimensions.
Default is 0.05.
boundaries : pints.Boundaries, optional
Boundary constraints for the optimisation. If None, no boundaries are applied.
Attributes
----------
eta_min : float
Factor by which the step size is reduced when the gradient sign changes.
Default is 0.5.
eta_max : float
Factor by which the step size is increased when the gradient sign remains consistent.
Default is 1.2.
step_min : float
Minimum allowable step size. Default is 1e-3 * min(sigma0).
step_max : float or None
Maximum allowable step size. Default is None (unlimited).
"""
def __init__(
self,
x0: np.ndarray,
sigma0: list[float] | None,
boundaries: pints.Boundaries | None,
):
super().__init__(x0, sigma0, boundaries)
# Set hypers
[docs]
self.step_min = 1e-4 * np.min(self._sigma0)
[docs]
self.step_max: float | None = None
# Store the previous update for backtracking
[docs]
self._update_previous = np.zeros_like(x0, dtype=float)
# Internal states
[docs]
self._x_current = np.array(x0, dtype=float)
[docs]
self._gradient_previous = None
[docs]
self._step_sizes = np.copy(self._sigma0)
[docs]
self._f_current = np.inf
[docs]
self._x_best = np.array(x0, dtype=float)
[docs]
self._ready_for_tell = False
# Boundaries
[docs]
self._boundaries = boundaries
if isinstance(boundaries, RectangularBoundaries):
self._lower = boundaries.lower()
self._upper = boundaries.upper()
else:
self._lower = self._upper = None
# Set proposals
[docs]
self._proposed = np.copy(self._x_current)
self._proposed.setflags(write=False)
[docs]
def ask(self):
"""
Proposes the next point for evaluation.
Returns
-------
list
A list containing the proposed point.
"""
if not self._running:
if self.step_min is not None and self.step_max is not None:
if self.step_min >= self.step_max:
raise ValueError(
f"Minimum step size ({self.step_min}) must be less than "
f"maximum step size ({self.step_max})."
)
self._running = True
self._ready_for_tell = True
return [self._proposed]
[docs]
def tell(self, reply):
"""
Updates the optimiser with the function value and gradient at the proposed point.
Parameters
----------
reply : list
A list containing a tuple of (function value, gradient) at the proposed point.
Raises
------
RuntimeError
If `ask()` was not called before `tell()`.
"""
if not self._ready_for_tell:
raise RuntimeError("ask() must be called before tell().")
self._ready_for_tell = False
f_new, gradient_new = reply[0]
gradient_new = gradient_new.reshape(-1)
# Setup for first iteration
if self._gradient_previous is None:
self._gradient_previous = gradient_new
self._f_current = f_new
return
# Compute element-wise gradient product
grad_product = gradient_new * self._gradient_previous
# Update step sizes, and bound them
self._step_sizes[grad_product > 0] *= self.eta_max
self._step_sizes[grad_product < 0] *= self.eta_min
self._step_sizes = np.clip(self._step_sizes, self.step_min, self.step_max)
# Handle weight backtracking,
# Reverting last update where gradient sign changed
gradient_new[grad_product < 0] = 0
self._x_current[grad_product < 0] -= self._update_previous[grad_product < 0]
# Update the current position
self._x_current = np.copy(self._proposed)
self._f_current = f_new
self._gradient_previous = gradient_new
# Compute the new update and store for back-tracking
update = -self._step_sizes * np.sign(gradient_new)
self._update_previous = update
# Step in the direction of the negative gradient
proposed = self._x_current + update
# Boundaries
if self._lower is not None:
# Rectangular boundaries
while np.any(proposed < self._lower) or np.any(proposed >= self._upper):
mask = np.logical_or(proposed < self._lower, proposed >= self._upper)
self._step_sizes[mask] *= self.eta_min
proposed = self._x_current - self._step_sizes * np.sign(gradient_new)
# Update proposed attribute
self._proposed = proposed
self._proposed.setflags(write=False)
# Update best solution
if f_new < self._f_best:
self._f_best = f_new
self._x_best = self._x_current
[docs]
def running(self):
"""Returns the state of the optimiser"""
return self._running
[docs]
def f_best(self):
"""Returns the best function value found so far."""
return self._f_best
[docs]
def x_best(self):
"""Returns the best position found so far."""
return self._x_best
[docs]
def name(self):
"""Returns the name of the optimiser."""
return "iRprop+"
[docs]
def needs_sensitivities(self):
"""Indicates that this optimiser requires gradient information."""
return True