Source code for classification

import numpy as np

from pybop import OptimisationResult



[docs]
def classify_using_hessian(
    result: OptimisationResult,
    dx=None,
    cost_tolerance: float | None = 1e-5,
):
    """
    A simple check for parameter correlations based on numerical approximation
    of the Hessian matrix at the optimal point using central finite differences.

    Parameters
    ---------
    result : OptimisationResult
        The optimisation result.
    dx : array-like, optional
        An array of small positive values used to check proximity to the parameter
        bounds and as the perturbation distance in the finite difference calculations.
    cost_tolerance : float, optional
        A small positive tolerance used for cost value comparisons (default: 1e-5).
    """
    x = result.x
    dx = np.asarray(dx) if dx is not None else np.maximum(x, 1e-40) * 1e-2
    best_cost = result.best_cost
    problem = result.optim.problem
    parameters = problem.parameters
    minimising = result.minimising

    def cost(x):
        return problem.__call__(x)

    n = len(x)
    if n != 2 or len(dx) != n:
        raise ValueError(
            "The function classify_using_hessian currently only works in the case "
            "of 2 parameters, and dx must have the same length as x."
        )

    # Get a list of parameter names for use in the output message
    names = parameters.names

    # Evaluate the cost for a grid of surrounding points
    costs = np.zeros((3, 3, 2))
    for i in np.arange(0, 3):
        for j in np.arange(0, 3):
            if i == j == 1:
                costs[1, 1, 0] = best_cost
                costs[1, 1, 1] = best_cost
            else:
                costs[i, j, 0] = cost(x + np.multiply([i - 1, j - 1], dx))
                costs[i, j, 1] = cost(x + np.multiply([i - 1, j - 1], 2 * dx))

    def check_proximity_to_bounds(parameters, x, dx, names) -> str:
        bounds = parameters.get_bounds()
        if bounds is not None:
            for i, value in enumerate(x):
                if value > bounds["upper"][i] - dx[i]:
                    return f" The result is near the upper bound of {names[i]}."

                if value < bounds["lower"][i] + dx[i]:
                    return f" The result is near the lower bound of {names[i]}."
        return ""

    # Classify the result
    if (minimising and np.any(costs < best_cost)) or (
        not minimising and np.any(costs > best_cost)
    ):
        message = "The optimiser has not converged to a stationary point."
        message += check_proximity_to_bounds(parameters, x, dx, names)

    elif not np.all([np.isfinite(cost) for cost in costs]):
        message = "Classification cannot proceed due to infinite cost value(s)."
        message += check_proximity_to_bounds(parameters, x, dx, names)

    else:
        # Estimate the Hessian using second-order accurate central finite differences
        # cfd_hessian = np.zeros((2, 2))
        # cfd_hessian[0, 0] = costs[2,1,0] - 2 * costs[1,1,0] + costs[0,1,0]
        # cfd_hessian[0, 1] = (costs[2,2,0] - costs[2,0,0] + costs[0,0,0] - costs[0,2,0]) / 4
        # cfd_hessian[1, 0] = cfd_hessian[0, 1]
        # cfd_hessian[1, 1] = costs[1,2,0] - 2 * costs[1,1,0] + costs[1,0,0]

        # Estimate the Hessian using fourth-order accurate central finite differences
        cfd_hessian = np.zeros((2, 2))
        cfd_hessian[0, 0] = (
            -costs[2, 1, 1]
            + 16 * costs[2, 1, 0]
            - 30 * costs[1, 1, 0]
            + 16 * costs[0, 1, 0]
            - costs[0, 1, 1]
        ) / 12
        cfd_hessian[0, 1] = (
            -(costs[2, 2, 1] - costs[2, 0, 1] + costs[0, 0, 1] - costs[0, 2, 1])
            + 16 * (costs[2, 2, 0] - costs[2, 0, 0] + costs[0, 0, 0] - costs[0, 2, 0])
        ) / 48
        cfd_hessian[1, 0] = cfd_hessian[0, 1]
        cfd_hessian[1, 1] = (
            -costs[1, 2, 1]
            + 16 * costs[1, 2, 0]
            - 30 * costs[1, 1, 0]
            + 16 * costs[1, 0, 0]
            - costs[1, 0, 1]
        ) / 12

        # Compute the eigenvalues and sort into ascending order
        eigenvalues, eigenvectors = np.linalg.eig(cfd_hessian)
        idx = eigenvalues.argsort()
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]

        # Classify the result
        if np.all(eigenvalues > cost_tolerance):
            message = "The optimiser has located a minimum."
        elif np.all(eigenvalues < -cost_tolerance):
            message = "The optimiser has located a maximum."
        elif np.all(np.abs(eigenvalues) > cost_tolerance):
            message = "The optimiser has located a saddle point."
        elif np.all(np.abs(eigenvalues) < cost_tolerance):
            message = f"The cost variation is smaller than the cost tolerance: {cost_tolerance}."
        else:
            # One eigenvalue is too small to classify with certainty
            message = "The cost variation is too small to classify with certainty."

        # Check for parameter correlations
        if np.any(np.abs(eigenvalues) > cost_tolerance):
            if np.allclose(eigenvectors[0], np.array([1, 0])):
                message += f" The cost is insensitive to a change of {dx[0]:.2g} in {names[0]}."
            elif np.allclose(eigenvectors[0], np.array([0, 1])):
                message += f" The cost is insensitive to a change of {dx[1]:.2g} in {names[1]}."
            else:
                diagonal_costs = [
                    cost(x - np.multiply(eigenvectors[:, 0], dx)),
                    cost(x + np.multiply(eigenvectors[:, 0], dx)),
                ]
                if np.allclose(best_cost, diagonal_costs, atol=cost_tolerance, rtol=0):
                    message += " There may be a correlation between these parameters."

    print(message)
    return message