from typing import Optional, Union
import numpy as np
from pybop.costs.base_cost import BaseCost
from pybop.observers.observer import Observer
[docs]
class FittingCost(BaseCost):
"""
Overwrites and extends `BaseCost` class for fitting-type cost functions.
This class is intended to be subclassed to create specific cost functions
for evaluating model predictions against a set of data. The cost function
quantifies the goodness-of-fit between the model predictions and the
observed data, with a lower cost value indicating a better fit.
"""
def __init__(self, problem):
super().__init__(problem)
[docs]
self.numpy_axis = (0, 2) if self.n_outputs > 1 else (1, 2)
[docs]
def compute(
self,
y: dict,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
"""
Computes the cost function for the given predictions.
Parameters
----------
y : dict
The dictionary of predictions with keys designating the signals for fitting.
dy : np.ndarray, optional
The corresponding gradient with respect to the parameters for each signal.
Returns
-------
tuple or float
If dy is not None, returns a tuple containing the cost (float) and the
gradient (np.ndarray), otherwise returns only the computed cost (float).
"""
# Early return if the prediction is not verified
if not self.verify_prediction(y):
return (np.inf, self.grad_fail) if dy is not None else np.inf
# Compute the residual
r = np.asarray([y[signal] - self._target[signal] for signal in self.signal])
return self._error_measure(r=r, dy=dy)
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
"""
Computes the cost function for the given predictions.
Parameters
----------
r : np.ndarray
The residual difference between the model prediction and the target.
dy : np.ndarray, optional
The corresponding gradient with respect to the parameters for each signal.
Returns
-------
tuple or float
If dy is not None, returns a tuple containing the cost (float) and the
gradient (np.ndarray), otherwise returns only the computed cost (float).
"""
raise NotImplementedError
[docs]
class MeanSquaredError(FittingCost):
"""
Mean square error (MSE) cost function.
Computes the mean square error between model predictions and the target
data, providing a measure of the differences between predicted values and
observed values.
"""
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
e = np.mean(np.abs(r) ** 2)
if dy is not None:
de = 2 * np.mean((r * dy.T), axis=self.numpy_axis)
return e, de
return e
[docs]
class RootMeanSquaredError(FittingCost):
"""
Root mean square error (RMSE) cost function.
Computes the root mean square error between model predictions and the target
data, providing a measure of the differences between predicted values and
observed values.
"""
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
e = np.sqrt(np.mean(np.abs(r) ** 2))
if dy is not None:
de = np.mean((r * dy.T), axis=self.numpy_axis) / (e + np.finfo(float).eps)
return e, de
return e
[docs]
class MeanAbsoluteError(FittingCost):
"""
Mean absolute error (MAE) cost function.
Computes the mean absolute error (MAE) between model predictions
and target data. The MAE is a measure of the average magnitude
of errors in a set of predictions, without considering their direction.
"""
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
e = np.mean(np.abs(r))
if dy is not None:
sign_r = np.sign(r)
de = np.mean(sign_r * dy.T, axis=self.numpy_axis)
return e, de
return e
[docs]
class SumSquaredError(FittingCost):
"""
Sum of squared error (SSE) cost function.
Computes the sum of the squares of the differences between model predictions
and target data, which serves as a measure of the total error between the
predicted and observed values.
"""
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
e = np.sum(np.abs(r) ** 2)
if dy is not None:
de = 2 * np.sum((r * dy.T), axis=self.numpy_axis)
return e, de
return e
[docs]
class Minkowski(FittingCost):
"""
The Minkowski distance is a generalisation of several distance metrics,
including the Euclidean and Manhattan distances. It is defined as:
.. math::
L_p(x, y) = ( \\sum_i |x_i - y_i|^p )^(1/p)
where p > 0 is the order of the Minkowski distance. For p ≥ 1, the
Minkowski distance is a metric. For 0 < p < 1, it is not a metric, as it
does not satisfy the triangle inequality, although a metric can be
obtained by removing the (1/p) exponent.
Special cases:
* p = 1: Manhattan distance
* p = 2: Euclidean distance
* p → ∞: Chebyshev distance (not implemented as yet)
This class implements the Minkowski distance as a cost function for
optimisation problems, allowing for flexible distance-based optimisation
across various problem domains.
Additional Attributes
---------------------
p : float, optional
The order of the Minkowski distance.
"""
def __init__(self, problem, p: float = 2.0):
super().__init__(problem)
if p < 0:
raise ValueError(
"The order of the Minkowski distance must be greater than 0."
)
elif not np.isfinite(p):
raise ValueError(
"For p = infinity, an implementation of the Chebyshev distance is required."
)
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
e = np.sum(np.abs(r) ** self.p) ** (1 / self.p)
if dy is not None:
de = np.sum(
np.sign(r) * np.abs(r) ** (self.p - 1) * dy.T, axis=self.numpy_axis
) / (e ** (self.p - 1) + np.finfo(float).eps)
return e, de
return e
[docs]
class SumofPower(FittingCost):
"""
The Sum of Power [1] is a generalised cost function based on the p-th power
of absolute differences between two vectors. It is defined as:
.. math::
C_p(x, y) = \\sum_i |x_i - y_i|^p
where p ≥ 0 is the power order.
This class implements the Sum of Power as a cost function for
optimisation problems, allowing for flexible power-based optimisation
across various problem domains.
Special cases:
* p = 1: Sum of Absolute Differences
* p = 2: Sum of Squared Differences
* p → ∞: Maximum Absolute Difference
Note that this is not normalised, unlike distance metrics. To get a
distance metric, you would need to take the p-th root of the result.
[1]: https://mathworld.wolfram.com/PowerSum.html
Additional Attributes
---------------------
p : float, optional
The power order for Sum of Power.
"""
def __init__(self, problem, p: float = 2.0):
super().__init__(problem)
if p < 0:
raise ValueError("The order of 'p' must be greater than 0.")
elif not np.isfinite(p):
raise ValueError("p = np.inf is not yet supported.")
[docs]
def _error_measure(
self,
r: np.ndarray,
dy: Optional[np.ndarray] = None,
) -> Union[float, tuple[float, np.ndarray]]:
e = np.sum(np.abs(r) ** self.p)
if dy is not None:
de = self.p * np.sum(
np.sign(r) * np.abs(r) ** (self.p - 1) * dy.T, axis=self.numpy_axis
)
return e, de
return e
[docs]
class ObserverCost(BaseCost):
"""
Observer cost function.
Computes the cost function for an observer model, which is log likelihood
of the data points given the model parameters.
Inherits all parameters and attributes from ``BaseCost``.
"""
def __init__(self, observer: Observer):
super().__init__(problem=observer)
[docs]
self._observer = observer
[docs]
self._has_separable_problem = False
[docs]
def compute(
self,
y: dict,
dy: Optional[np.ndarray] = None,
) -> float:
"""
Computes the cost function for the given predictions.
Parameters
----------
y : dict
The dictionary of predictions with keys designating the signals for fitting.
dy : np.ndarray, optional
The corresponding gradient with respect to the parameters for each signal.
Returns
-------
float
The observer cost (negative of the log likelihood).
"""
inputs = self._parameters.as_dict()
log_likelihood = self._observer.log_likelihood(
self._target, self._observer.domain_data, inputs
)
return -log_likelihood