Source code for neurokit2.stats.standardize

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd

from .mad import mad


[docs]def standardize(data, robust=False, window=None, **kwargs):
    """Standardization of data.

    Performs a standardization of data (Z-scoring), i.e., centering and scaling, so that the data is
    expressed in terms of standard deviation (i.e., mean = 0, SD = 1) or Median Absolute Deviance
    (median = 0, MAD = 1).

    Parameters
    ----------
    data : Union[list, np.array, pd.Series]
        Raw data.
    robust : bool
        If True, centering is done by substracting the median from the variables and dividing it by
        the median absolute deviation (MAD). If False, variables are standardized by substracting the
        mean and dividing it by the standard deviation (SD).
    window : int
        Perform a rolling window standardization, i.e., apply a standardization on a window of the
        specified number of samples that rolls along the main axis of the signal. Can be used for
        complex detrending.
    **kwargs : optional
        Other arguments to be passed to ``pandas.rolling()``.

    Returns
    ----------
    list
        The standardized values.


    Examples
    ----------
    >>> import neurokit2 as nk
    >>> import pandas as pd
    >>>
    >>> # Simple example
    >>> nk.standardize([3, 1, 2, 4, 6, np.nan]) #doctest: +ELLIPSIS
    [...]
    >>> nk.standardize([3, 1, 2, 4, 6, np.nan], robust=True) #doctest: +ELLIPSIS
    [...]
    >>> nk.standardize(np.array([[1, 2, 3, 4], [5, 6, 7, 8]]).T) #doctest: +ELLIPSIS
     array(...)
    >>> nk.standardize(pd.DataFrame({"A": [3, 1, 2, 4, 6, np.nan],
    ...                              "B": [3, 1, 2, 4, 6, 5]})) #doctest: +ELLIPSIS
              A         B
    0       ...       ...
    ...
    >>>
    >>> # Rolling standardization of a signal
    >>> signal = nk.signal_simulate(frequency=[0.1, 2], sampling_rate=200)
    >>> z = nk.standardize(signal, window=200)
    >>> nk.signal_plot([signal, z], standardize=True)

    """
    # Return appropriate type
    if isinstance(data, list):
        data = list(_standardize(np.array(data), robust=robust, window=window, **kwargs))
    elif isinstance(data, pd.DataFrame):
        data = pd.DataFrame(_standardize(data, robust=robust, window=window, **kwargs))
    elif isinstance(data, pd.Series):
        data = pd.Series(_standardize(data, robust=robust, window=window, **kwargs))
    else:
        data = _standardize(data, robust=robust, window=window, **kwargs)

    return data


# =============================================================================
# Internals
# =============================================================================
def _standardize(data, robust=False, window=None, **kwargs):

    # Compute standardized on whole data
    if window is None:
        if robust is False:
            z = (data - np.nanmean(data, axis=0)) / np.nanstd(data, axis=0, ddof=1)
        else:
            z = (data - np.nanmedian(data, axis=0)) / mad(data)

    # Rolling standardization on windows
    else:
        df = pd.DataFrame(data)  # Force dataframe

        if robust is False:
            z = (df - df.rolling(window, min_periods=0, **kwargs).mean()) / df.rolling(
                window, min_periods=0, **kwargs
            ).std(ddof=1)
        else:
            z = (df - df.rolling(window, min_periods=0, **kwargs).median()) / df.rolling(
                window, min_periods=0, **kwargs
            ).apply(mad)

        # Fill the created nans
        z = z.fillna(method="bfill")

        # Restore to vector or array
        if z.shape[1] == 1:
            z = z[0].values
        else:
            z = z.values

    return z