Source code for pybop._utils

import json
import pickle
import re

import numpy as np
import pandas as pd
from scipy.io import loadmat, savemat



[docs]
class NumpyEncoder(json.JSONEncoder):
    """
    Numpy serialiser helper class that converts numpy arrays to a list.
    Numpy arrays cannot be directly converted to JSON, so the arrays are
    converted to python list objects before encoding.
    """


[docs]
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        # won't be called since we only need to convert numpy arrays
        return json.JSONEncoder.default(self, obj)  # pragma: no cover





[docs]
def add_spaces(string):
    """
    Return the class name as a string with spaces before each new capitalised word.
    """
    re_outer = re.compile(r"([^A-Z ])([A-Z])")
    re_inner = re.compile(r"(?<!^)([A-Z])([^A-Z])")
    return re_outer.sub(r"\1 \2", re_inner.sub(r" \1\2", string))




[docs]
def is_numeric(x):
    """
    Check if a variable is numeric.
    """
    return isinstance(x, int | float | np.number)




[docs]
def save_data_dict(
    data_dict: dict,
    filename: str | None = None,
    to_format: str = "pickle",
) -> str | None:
    """
    Save data from given data dictionary

    Based on pybamm.Solution.save_data

    Parameters
    ----------
    filename : str, optional
        The name of the file to save data to. If None, then a str is returned
    to_format : str, optional
        The format to save to. Options are:

        - 'pickle' (default): creates a pickle file with the data dictionary
        - 'matlab': creates a .mat file, for loading in matlab
        - 'csv': creates a csv file (0D variables only)
        - 'json': creates a json file

    Returns
    -------
    data : str, optional
        str if 'json' is chosen and filename is None, otherwise None
    """

    if to_format == "pickle":
        if filename is None:
            raise ValueError("pickle format must be written to a file")
        with open(filename, "wb") as f:
            pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL)
    elif to_format == "matlab":
        if filename is None:
            raise ValueError("matlab format must be written to a file")
        # Check all the variable names only contain a-z, A-Z or _ or numbers
        for name in data_dict.keys():
            # Check the string only contains the following ASCII:
            # a-z (97-122)
            # A-Z (65-90)
            # _ (95)
            # 0-9 (48-57) but not in the first position
            for i, s in enumerate(name):
                if not (
                    97 <= ord(s) <= 122
                    or 65 <= ord(s) <= 90
                    or ord(s) == 95
                    or (i > 0 and 48 <= ord(s) <= 57)
                ):
                    raise ValueError(
                        f"Invalid character '{s}' found in '{name}'. "
                        "MATLAB variable names must only contain a-z, A-Z, _, "
                        "or 0-9 (except the first position). "
                    )
        savemat(filename, data_dict)
    elif to_format == "csv":
        # use copy to avoid modifying input
        data_dict_copy = data_dict.copy()
        for name, var in data_dict.items():
            var = np.asarray(var)
            if var.ndim == 0:
                data_dict_copy[name] = [var]
            elif var.ndim >= 2:
                raise ValueError(
                    f"only 0D variables can be saved to csv, but '{name}' is {var.ndim - 1}D"
                )
        df = pd.DataFrame(data_dict_copy)
        return df.to_csv(filename, index=False)
    elif to_format == "json":
        if filename is None:
            return json.dumps(data_dict, cls=NumpyEncoder)
        else:
            with open(filename, "w") as outfile:
                json.dump(data_dict, outfile, cls=NumpyEncoder)
    else:
        raise ValueError(f"format '{to_format}' is not supported")




[docs]
def load_data_dict(
    filename: str,
    file_format: str = "pickle",
    data_keys_0d: list[str] | None = None,
    data_keys_1d: list[str] | None = None,
) -> dict:
    """
    Load data as dictionary from a given file. Restores data saved with
    save_data_dict.

    Parameters
    ----------
    filename : str
        The name of the file containing the data.
    file_format : str, optional
        The format the data was save to. Options are:
        - 'pickle' (default)
        - 'matlab'
        - 'csv'
        - 'json'
    data_keys_0d: list[str], optional
        A list of keys for which the data is a scalar/0-dimensional.
        This is only needed for file_format='matlab' or file_format = 'csv'.
        scipy.io.savemat turns any data into a multi-dimensional array with at least 2 dimensions.
        If provided, data dimensions will be consistent with the original data.
    data_keys_1d: list[str], optional
        A list of keys for which the data is a 1-dimensional list or array.
        This is only needed for file_format='matlab'. scipy.io.savemat turns any
        data into a multi-dimensional array with at least 2 dimensions.
        If provided, data dimensions will be consistent with the original data.

    Returns
    -------
    data_dict :
        python dictionary containing the data in the file.
    """

    # Load data using appropriate method according to the file_format
    if file_format == "pickle":
        with open(filename, "rb") as f:
            data_dict = pickle.load(f)

    elif file_format == "matlab":
        data_dict = {}
        loadmat(filename, mdict=data_dict)

        # fix dimensions for 0-d and 1-d data
        data_keys_0d = data_keys_0d or []
        data_keys_1d = data_keys_1d or []
        for key in data_keys_0d:
            if key in data_dict.keys():
                data_dict[key] = data_dict[key][0, 0]
        for key in data_keys_1d:
            if key in data_dict.keys():
                data_dict[key] = data_dict[key].flatten()

    elif file_format == "json":
        with open(filename) as f:
            data_dict = json.load(f)

    elif file_format == "csv":
        data_dict = pd.read_csv(filename).to_dict(orient="list")

        # fix dimensions for 0-d data
        data_keys_0d = data_keys_0d or []
        for key in data_keys_0d:
            if key in data_dict.keys():
                data_dict[key] = data_dict[key][0]

    else:
        raise ValueError(f"format '{file_format}' is not supported.")

    return data_dict