Source code for neurokit2.signal.signal_fixpeaks

# - * - coding: utf-8 - * -
import matplotlib.patches
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from ..stats import standardize
from .signal_formatpeaks import _signal_formatpeaks_sanitize
from .signal_period import signal_period


[docs]def signal_fixpeaks(
    peaks,
    sampling_rate=1000,
    iterative=True,
    show=False,
    interval_min=None,
    interval_max=None,
    relative_interval_min=None,
    relative_interval_max=None,
    robust=False,
    method="Kubios",
):
    """Correct erroneous peak placements.

    Identify and correct erroneous peak placements based on outliers in peak-to-peak differences (period).

    Parameters
    ----------
    peaks : list or array or DataFrame or Series or dict
        The samples at which the peaks occur. If an array is passed in, it is assumed that it was obtained
        with `signal_findpeaks()`. If a DataFrame is passed in, it is assumed to be obtained with `ecg_findpeaks()`
        or `ppg_findpeaks()` and to be of the same length as the input signal.
    sampling_rate : int
        The sampling frequency of the signal that contains the peaks (in Hz, i.e., samples/second).
    iterative : bool
        Whether or not to apply the artifact correction repeatedly (results in superior artifact correction).
    show : bool
        Whether or not to visualize artifacts and artifact thresholds.
    interval_min : float
        The minimum interval between the peaks.
    interval_max : float
        The maximum interval between the peaks.
    relative_interval_min : float
        The minimum interval between the peaks as relative to the sample (expressed in
        standard deviation from the mean).
    relative_interval_max : float
        The maximum interval between the peaks as relative to the sample (expressed in
        standard deviation from the mean).
    robust : bool
        Use a robust method of standardization (see `standardize()`) for the relative thresholds.
    method : str
        Either "Kubios" or "Neurokit". "Kubios" uses the artifact detection and correction described
        in Lipponen, J. A., & Tarvainen, M. P. (2019). Note that "Kubios" is only meant for peaks in
        ECG or PPG. "neurokit" can be used with peaks in ECG, PPG, or respiratory data.

    Returns
    -------
    peaks_clean : array
        The corrected peak locations.
    artifacts : dict
        Only if method="Kubios". A dictionary containing the indices of artifacts, accessible with the
        keys "ectopic", "missed", "extra", and "longshort".

    See Also
    --------
    signal_findpeaks, ecg_findpeaks, ecg_peaks, ppg_findpeaks, ppg_peaks

    Examples
    --------
    >>> import neurokit2 as nk
    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>>
    >>> # Kubios
    >>> ecg = nk.ecg_simulate(duration=240, noise=0.25, heart_rate=70, random_state=42)
    >>> rpeaks_uncorrected = nk.ecg_findpeaks(ecg)
    >>> artifacts, rpeaks_corrected = nk.signal_fixpeaks(rpeaks_uncorrected, iterative=True,
    ...                                                  show=True, method="Kubios")
    >>> rate_corrected = nk.signal_rate(rpeaks_corrected, desired_length=len(ecg))
    >>> rate_uncorrected = nk.signal_rate(rpeaks_uncorrected, desired_length=len(ecg))
    >>>
    >>> fig, ax = plt.subplots()
    >>> ax.plot(rate_uncorrected, label="heart rate without artifact correction") #doctest: +SKIP
    >>> ax.plot(rate_corrected, label="heart rate with artifact correction") #doctest: +SKIP
    >>> ax.legend(loc="upper right") #doctest: +SKIP
    >>>
    >>> # NeuroKit
    >>> signal = nk.signal_simulate(duration=4, sampling_rate=1000, frequency=1)
    >>> peaks_true = nk.signal_findpeaks(signal)["Peaks"]
    >>> peaks = np.delete(peaks_true, [1])  # create gaps
    >>>
    >>> signal = nk.signal_simulate(duration=20, sampling_rate=1000, frequency=1)
    >>> peaks_true = nk.signal_findpeaks(signal)["Peaks"]
    >>> peaks = np.delete(peaks_true, [5, 15])  # create gaps
    >>> peaks = np.sort(np.append(peaks, [1350, 11350, 18350]))  # add artifacts
    >>>
    >>> peaks_corrected = nk.signal_fixpeaks(peaks=peaks, interval_min=0.5, interval_max=1.5, method="neurokit")
    >>> # Plot and shift original peaks to the rightto see the difference.
    >>> fig = nk.events_plot([peaks + 50, peaks_corrected], signal)
    >>> fig #doctest: +SKIP

    References
    ----------
    - Lipponen, J. A., & Tarvainen, M. P. (2019). A robust algorithm for heart rate variability time
    series artefact correction using novel beat classification. Journal of medical engineering & technology,
    43(3), 173-181. 10.1080/03091902.2019.1640306

    """
    # Format input
    peaks = _signal_formatpeaks_sanitize(peaks)

    # If method Kubios
    if method.lower() == "kubios":

        return _signal_fixpeaks_kubios(peaks, sampling_rate=sampling_rate, iterative=iterative, show=show)

    # Else method is NeuroKit
    return _signal_fixpeaks_neurokit(
        peaks,
        sampling_rate=sampling_rate,
        interval_min=interval_min,
        interval_max=interval_max,
        relative_interval_min=relative_interval_min,
        relative_interval_max=relative_interval_max,
        robust=robust,
    )


# =============================================================================
# Methods
# =============================================================================
def _signal_fixpeaks_neurokit(
    peaks,
    sampling_rate=1000,
    interval_min=None,
    interval_max=None,
    relative_interval_min=None,
    relative_interval_max=None,
    robust=False,
):
    """Neurokit method."""

    peaks_clean = _remove_small(peaks, sampling_rate, interval_min, relative_interval_min, robust)
    peaks_clean = _interpolate_big(peaks, sampling_rate, interval_max, relative_interval_max, robust)

    return peaks_clean


def _signal_fixpeaks_kubios(peaks, sampling_rate=1000, iterative=True, show=False):
    """kubios method."""

    # Get corrected peaks and normal-to-normal intervals.
    artifacts, subspaces = _find_artifacts(peaks, sampling_rate=sampling_rate)
    peaks_clean = _correct_artifacts(artifacts, peaks)

    if iterative:

        # Iteratively apply the artifact correction until the number of artifact
        # reaches an equilibrium (i.e., the number of artifacts does not change
        # anymore from one iteration to the next).
        n_artifacts_previous = np.inf
        n_artifacts_current = sum([len(i) for i in artifacts.values()])

        previous_diff = 0

        while n_artifacts_current - n_artifacts_previous != previous_diff:

            previous_diff = n_artifacts_previous - n_artifacts_current

            artifacts, subspaces = _find_artifacts(peaks_clean, sampling_rate=sampling_rate)
            peaks_clean = _correct_artifacts(artifacts, peaks_clean)

            n_artifacts_previous = n_artifacts_current
            n_artifacts_current = sum([len(i) for i in artifacts.values()])

    if show:
        _plot_artifacts_lipponen2019(artifacts, subspaces)

    return artifacts, peaks_clean


# =============================================================================
# Kubios: Lipponen & Tarvainen (2019).
# =============================================================================
def _find_artifacts(peaks, c1=0.13, c2=0.17, alpha=5.2, window_width=91, medfilt_order=11, sampling_rate=1000):

    # Compute period series (make sure it has same numer of elements as peaks);
    # peaks are in samples, convert to seconds.
    rr = np.ediff1d(peaks, to_begin=0) / sampling_rate
    # For subsequent analysis it is important that the first element has
    # a value in a realistic range (e.g., for median filtering).
    rr[0] = np.mean(rr[1:])

    # Artifact identification #################################################
    ###########################################################################

    # Compute dRRs: time series of differences of consecutive periods (dRRs).
    drrs = np.ediff1d(rr, to_begin=0)
    drrs[0] = np.mean(drrs[1:])
    # Normalize by threshold.
    th1 = _compute_threshold(drrs, alpha, window_width)
    drrs /= th1

    # ignore division by 0 warning
    np.seterr(divide="ignore", invalid="ignore")

    # Cast dRRs to subspace s12.
    # Pad drrs with one element.
    padding = 2
    drrs_pad = np.pad(drrs, padding, "reflect")

    s12 = np.zeros(drrs.size)
    for d in np.arange(padding, padding + drrs.size):

        if drrs_pad[d] > 0:
            s12[d - padding] = np.max([drrs_pad[d - 1], drrs_pad[d + 1]])
        elif drrs_pad[d] < 0:
            s12[d - padding] = np.min([drrs_pad[d - 1], drrs_pad[d + 1]])

    # Cast dRRs to subspace s22.
    s22 = np.zeros(drrs.size)
    for d in np.arange(padding, padding + drrs.size):

        if drrs_pad[d] >= 0:
            s22[d - padding] = np.min([drrs_pad[d + 1], drrs_pad[d + 2]])
        elif drrs_pad[d] < 0:
            s22[d - padding] = np.max([drrs_pad[d + 1], drrs_pad[d + 2]])

    # Compute mRRs: time series of deviation of RRs from median.
    df = pd.DataFrame({"signal": rr})
    medrr = df.rolling(medfilt_order, center=True, min_periods=1).median().signal.to_numpy()
    mrrs = rr - medrr
    mrrs[mrrs < 0] = mrrs[mrrs < 0] * 2
    # Normalize by threshold.
    th2 = _compute_threshold(mrrs, alpha, window_width)
    mrrs /= th2

    # Artifact classification #################################################
    ###########################################################################

    # Artifact classes.
    extra_idcs = []
    missed_idcs = []
    ectopic_idcs = []
    longshort_idcs = []

    i = 0
    while i < rr.size - 2:  # The flow control is implemented based on Figure 1

        if np.abs(drrs[i]) <= 1:  # Figure 1
            i += 1
            continue
        eq1 = np.logical_and(drrs[i] > 1, s12[i] < (-c1 * drrs[i] - c2))  # pylint: disable=E1111
        eq2 = np.logical_and(drrs[i] < -1, s12[i] > (-c1 * drrs[i] + c2))  # pylint: disable=E1111

        if np.any([eq1, eq2]):
            # If any of the two equations is true.
            ectopic_idcs.append(i)
            i += 1
            continue
        # If none of the two equations is true.
        if ~np.any([np.abs(drrs[i]) > 1, np.abs(mrrs[i]) > 3]):  # Figure 1
            i += 1
            continue
        longshort_candidates = [i]
        # Check if the following beat also needs to be evaluated.
        if np.abs(drrs[i + 1]) < np.abs(drrs[i + 2]):
            longshort_candidates.append(i + 1)

        for j in longshort_candidates:
            # Long beat.
            eq3 = np.logical_and(drrs[j] > 1, s22[j] < -1)  # pylint: disable=E1111
            # Long or short.
            eq4 = np.abs(mrrs[j]) > 3  # Figure 1
            # Short beat.
            eq5 = np.logical_and(drrs[j] < -1, s22[j] > 1)  # pylint: disable=E1111

            if ~np.any([eq3, eq4, eq5]):
                # If none of the three equations is true: normal beat.
                i += 1
                continue
            # If any of the three equations is true: check for missing or extra
            # peaks.

            # Missing.
            eq6 = np.abs(rr[j] / 2 - medrr[j]) < th2[j]  # Figure 1
            # Extra.
            eq7 = np.abs(rr[j] + rr[j + 1] - medrr[j]) < th2[j]  # Figure 1

            # Check if extra.
            if np.all([eq5, eq7]):
                extra_idcs.append(j)
                i += 1
                continue
            # Check if missing.
            if np.all([eq3, eq6]):
                missed_idcs.append(j)
                i += 1
                continue
            # If neither classified as extra or missing, classify as "long or
            # short".
            longshort_idcs.append(j)
            i += 1

    # Prepare output
    artifacts = {"ectopic": ectopic_idcs, "missed": missed_idcs, "extra": extra_idcs, "longshort": longshort_idcs}

    subspaces = {"rr": rr, "drrs": drrs, "mrrs": mrrs, "s12": s12, "s22": s22, "c1": c1, "c2": c2}

    return artifacts, subspaces


def _compute_threshold(signal, alpha, window_width):

    df = pd.DataFrame({"signal": np.abs(signal)})
    q1 = df.rolling(window_width, center=True, min_periods=1).quantile(0.25).signal.to_numpy()
    q3 = df.rolling(window_width, center=True, min_periods=1).quantile(0.75).signal.to_numpy()
    th = alpha * ((q3 - q1) / 2)

    return th


def _correct_artifacts(artifacts, peaks):

    # Artifact correction
    #####################
    # The integrity of indices must be maintained if peaks are inserted or
    # deleted: for each deleted beat, decrease indices following that beat in
    # all other index lists by 1. Likewise, for each added beat, increment the
    # indices following that beat in all other lists by 1.
    extra_idcs = artifacts["extra"]
    missed_idcs = artifacts["missed"]
    ectopic_idcs = artifacts["ectopic"]
    longshort_idcs = artifacts["longshort"]

    # Delete extra peaks.
    if extra_idcs:
        peaks = _correct_extra(extra_idcs, peaks)
        # Update remaining indices.
        missed_idcs = _update_indices(extra_idcs, missed_idcs, -1)
        ectopic_idcs = _update_indices(extra_idcs, ectopic_idcs, -1)
        longshort_idcs = _update_indices(extra_idcs, longshort_idcs, -1)

    # Add missing peaks.
    if missed_idcs:
        peaks = _correct_missed(missed_idcs, peaks)
        # Update remaining indices.
        ectopic_idcs = _update_indices(missed_idcs, ectopic_idcs, 1)
        longshort_idcs = _update_indices(missed_idcs, longshort_idcs, 1)

    if ectopic_idcs:
        peaks = _correct_misaligned(ectopic_idcs, peaks)

    if longshort_idcs:
        peaks = _correct_misaligned(longshort_idcs, peaks)

    return peaks


def _correct_extra(extra_idcs, peaks):

    corrected_peaks = peaks.copy()
    corrected_peaks = np.delete(corrected_peaks, extra_idcs)

    return corrected_peaks


def _correct_missed(missed_idcs, peaks):

    corrected_peaks = peaks.copy()
    missed_idcs = np.array(missed_idcs)
    # Calculate the position(s) of new beat(s). Make sure to not generate
    # negative indices. prev_peaks and next_peaks must have the same
    # number of elements.
    valid_idcs = np.logical_and(missed_idcs > 1, missed_idcs < len(corrected_peaks))  # pylint: disable=E1111
    missed_idcs = missed_idcs[valid_idcs]
    prev_peaks = corrected_peaks[[i - 1 for i in missed_idcs]]
    next_peaks = corrected_peaks[missed_idcs]
    added_peaks = prev_peaks + (next_peaks - prev_peaks) / 2
    # Add the new peaks before the missed indices (see numpy docs).
    corrected_peaks = np.insert(corrected_peaks, missed_idcs, added_peaks)

    return corrected_peaks


def _correct_misaligned(misaligned_idcs, peaks):

    corrected_peaks = peaks.copy()
    misaligned_idcs = np.array(misaligned_idcs)
    # Make sure to not generate negative indices, or indices that exceed
    # the total number of peaks. prev_peaks and next_peaks must have the
    # same number of elements.
    valid_idcs = np.logical_and(
        misaligned_idcs > 1, misaligned_idcs < len(corrected_peaks) - 1  # pylint: disable=E1111
    )
    misaligned_idcs = misaligned_idcs[valid_idcs]
    prev_peaks = corrected_peaks[[i - 1 for i in misaligned_idcs]]
    next_peaks = corrected_peaks[[i + 1 for i in misaligned_idcs]]

    half_ibi = (next_peaks - prev_peaks) / 2
    peaks_interp = prev_peaks + half_ibi
    # Shift the R-peaks from the old to the new position.
    corrected_peaks = np.delete(corrected_peaks, misaligned_idcs)
    corrected_peaks = np.concatenate((corrected_peaks, peaks_interp)).astype(int)
    corrected_peaks.sort(kind="mergesort")

    return corrected_peaks


def _update_indices(source_idcs, update_idcs, update):
    """For every element s in source_idcs, change every element u in update_idcs according to update, if u is larger
    than s."""
    if not update_idcs:
        return update_idcs

    for s in source_idcs:
        update_idcs = [u + update if u > s else u for u in update_idcs]

    return update_idcs


def _plot_artifacts_lipponen2019(artifacts, info):

    # Extract parameters
    longshort_idcs = artifacts["longshort"]
    ectopic_idcs = artifacts["ectopic"]
    extra_idcs = artifacts["extra"]
    missed_idcs = artifacts["missed"]

    rr = info["rr"]
    drrs = info["drrs"]
    mrrs = info["mrrs"]
    s12 = info["s12"]
    s22 = info["s22"]
    c1 = info["c1"]
    c2 = info["c2"]

    # Visualize artifact type indices.

    # Set grids
    gs = matplotlib.gridspec.GridSpec(ncols=4, nrows=3, width_ratios=[1, 2, 2, 2])
    fig = plt.figure(constrained_layout=False, figsize=(15, 10))
    ax0 = fig.add_subplot(gs[0, :-2])
    ax1 = fig.add_subplot(gs[1, :-2])
    ax2 = fig.add_subplot(gs[2, :-2])
    ax3 = fig.add_subplot(gs[:, -1])
    ax4 = fig.add_subplot(gs[:, -2])

    ax0.set_title("Artifact types", fontweight="bold")
    ax0.plot(rr, label="heart period")
    ax0.scatter(longshort_idcs, rr[longshort_idcs], marker="x", c="m", s=100, zorder=3, label="long/short")
    ax0.scatter(ectopic_idcs, rr[ectopic_idcs], marker="x", c="g", s=100, zorder=3, label="ectopic")
    ax0.scatter(extra_idcs, rr[extra_idcs], marker="x", c="y", s=100, zorder=3, label="false positive")
    ax0.scatter(missed_idcs, rr[missed_idcs], marker="x", c="r", s=100, zorder=3, label="false negative")
    ax0.legend(loc="upper right")

    # Visualize first threshold.
    ax1.set_title("Consecutive-difference criterion", fontweight="bold")
    ax1.plot(np.abs(drrs), label="normalized difference consecutive heart periods")
    ax1.axhline(1, c="r", label="artifact threshold")
    ax1.legend(loc="upper right")
    ax1.set_ylim(0, 5)

    # Visualize second threshold.
    ax2.set_title("Difference-from-median criterion", fontweight="bold")
    ax2.plot(np.abs(mrrs), label="difference from median over 11 periods")
    ax2.axhline(3, c="r", label="artifact threshold")
    ax2.legend(loc="upper right")
    ax2.set_ylim(0, 5)

    # Visualize subspaces.
    ax4.set_title("Subspace 1", fontweight="bold")
    ax4.set_xlabel("S11")
    ax4.set_ylabel("S12")
    ax4.scatter(drrs, s12, marker="x", label="heart periods")
    ax4.set_ylim(-5, 5)
    ax4.set_xlim(-10, 10)
    verts0 = [(-10, 5), (-10, -c1 * -10 + c2), (-1, -c1 * -1 + c2), (-1, 5)]

    poly0 = matplotlib.patches.Polygon(verts0, alpha=0.3, facecolor="r", edgecolor=None, label="ectopic periods")
    ax4.add_patch(poly0)
    verts1 = [(1, -c1 * 1 - c2), (1, -5), (10, -5), (10, -c1 * 10 - c2)]
    poly1 = matplotlib.patches.Polygon(verts1, alpha=0.3, facecolor="r", edgecolor=None)
    ax4.add_patch(poly1)
    ax4.legend(loc="upper right")

    ax3.set_title("Subspace 2", fontweight="bold")
    ax3.set_xlabel("S21")
    ax3.set_ylabel("S22")
    ax3.scatter(drrs, s22, marker="x", label="heart periods")
    ax3.set_xlim(-10, 10)
    ax3.set_ylim(-10, 10)
    verts2 = [(-10, 10), (-10, 1), (-1, 1), (-1, 10)]
    poly2 = matplotlib.patches.Polygon(verts2, alpha=0.3, facecolor="r", edgecolor=None, label="short periods")
    ax3.add_patch(poly2)
    verts3 = [(1, -1), (1, -10), (10, -10), (10, -1)]
    poly3 = matplotlib.patches.Polygon(verts3, alpha=0.3, facecolor="y", edgecolor=None, label="long periods")
    ax3.add_patch(poly3)
    ax3.legend(loc="upper right")


# =============================================================================
# Neurokit
# =============================================================================
def _remove_small(peaks, sampling_rate=1000, interval_min=None, relative_interval_min=None, robust=False):
    if interval_min is None and relative_interval_min is None:
        return peaks

    if interval_min is not None:
        interval = signal_period(peaks, sampling_rate=sampling_rate, desired_length=None)
        peaks = peaks[interval > interval_min]

    if relative_interval_min is not None:
        interval = signal_period(peaks, sampling_rate=sampling_rate, desired_length=None)
        peaks = peaks[standardize(interval, robust=robust) > relative_interval_min]

    return peaks


def _interpolate_big(peaks, sampling_rate=1000, interval_max=None, relative_interval_max=None, robust=False):
    if interval_max is None and relative_interval_max is None:
        return peaks

    continue_loop = True
    while continue_loop is True:
        if interval_max is not None:
            interval = signal_period(peaks, sampling_rate=sampling_rate, desired_length=None)
            peaks, continue_loop = _interpolate_missing(peaks, interval, interval_max, sampling_rate)

        if relative_interval_max is not None:
            interval = signal_period(peaks, sampling_rate=sampling_rate, desired_length=None)
            interval = standardize(interval, robust=robust)
            peaks, continue_loop = _interpolate_missing(peaks, interval, interval_max, sampling_rate)

    return peaks


def _interpolate_missing(peaks, interval, interval_max, sampling_rate):
    outliers = interval > interval_max
    outliers_loc = np.where(outliers)[0]
    if np.sum(outliers) == 0:
        return peaks, False

    # Delete large interval and replace by two unknown intervals
    interval[outliers] = np.nan
    interval = np.insert(interval, outliers_loc, np.nan)
    #    new_peaks_location = np.where(np.isnan(interval))[0]

    # Interpolate values
    interval = pd.Series(interval).interpolate().values
    peaks_corrected = _period_to_location(interval, sampling_rate, first_location=peaks[0])
    peaks = np.insert(peaks, outliers_loc, peaks_corrected[outliers_loc + np.arange(len(outliers_loc))])
    return peaks, True


def _period_to_location(period, sampling_rate=1000, first_location=0):
    location = np.cumsum(period * sampling_rate)
    location = location - (location[0] - first_location)
    return location.astype(np.int)