# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import scipy.spatial
from .complexity_embedding import complexity_embedding
[docs]def complexity_dimension(signal, delay=1, dimension_max=20, method="afnn", show=False, R=10.0, A=2.0, **kwargs):
"""Estimate optimal Dimension (m) for time-delay embedding.
Parameters
----------
signal : Union[list, np.array, pd.Series]
The signal (i.e., a time series) in the form of a vector of values.
delay : int
Time delay (often denoted 'Tau', sometimes referred to as 'lag').
In practice, it is common to have a fixed time lag (corresponding for instance to the
sampling rate; Gautama, 2003), or to find a suitable value using some algorithmic heuristics
(see ``complexity_delay()``).
dimension_max : int
The maximum embedding dimension (often denoted 'm' or 'd', sometimes referred to as 'order')
to test.
method : str
Method can either be afnn (average false nearest neighbour) or fnn (false nearest neighbour).
show : bool
Visualize the result.
R : float
Relative tolerance (for fnn method).
A : float
Absolute tolerance (for fnn method)
**kwargs
Other arguments.
Returns
-------
int
Optimal dimension.
See Also
------------
complexity_delay, complexity_embedding
Examples
---------
>>> import neurokit2 as nk
>>>
>>> # Artifical example
>>> signal = nk.signal_simulate(duration=10, frequency=1, noise=0.01)
>>> delay = nk.complexity_delay(signal, delay_max=500)
>>>
>>> values = nk.complexity_dimension(signal, delay=delay, dimension_max=20, show=True)
References
-----------
- Cao, L. (1997). Practical method for determining the minimum embedding dimension of a scalar
time series. Physica D: Nonlinear Phenomena, 110(1-2), 43-50.
"""
# Initalize vectors
if isinstance(dimension_max, int):
dimension_seq = np.arange(1, dimension_max + 1)
else:
dimension_seq = np.array(dimension_max)
# Method
method = method.lower()
if method in ["afnn"]:
E, Es = _embedding_dimension_afn(signal, dimension_seq=dimension_seq, delay=delay, **kwargs)
E1 = E[1:] / E[:-1]
E2 = Es[1:] / Es[:-1]
# To find where E1 saturates, set a threshold of difference
# threshold = 0.1 * (np.max(E1) - np.min(E1))
min_dimension = [i for i, x in enumerate(E1 >= 0.85 * np.max(E1)) if x][0] + 1
if show is True:
_embedding_dimension_plot(
method=method, dimension_seq=dimension_seq, min_dimension=min_dimension, E1=E1, E2=E2
)
elif method in ["fnn"]:
f1, f2, f3 = _embedding_dimension_ffn(signal, dimension_seq=dimension_seq, delay=delay, R=R, A=A, **kwargs)
min_dimension = [i for i, x in enumerate(f3 <= 1.85 * np.min(f3[np.nonzero(f3)])) if x][0]
if show is True:
_embedding_dimension_plot(
method=method, dimension_seq=dimension_seq, min_dimension=min_dimension, f1=f1, f2=f2, f3=f3
)
else:
raise ValueError("NeuroKit error: complexity_dimension(): 'method' " "not recognized.")
return min_dimension
# =============================================================================
# Methods
# =============================================================================
def _embedding_dimension_afn(signal, dimension_seq, delay=1, **kwargs):
"""Return E(d) and E^*(d) for a all d in dimension_seq.
E(d) and E^*(d) will be used to calculate E1(d) and E2(d)
El(d) = E(d + 1)/E(d). E1(d) stops changing when d is greater than some value d0 if the time
series comes from an attractor. Then d0 + 1 is the minimum embedding dimension we look for.
E2(d) = E*(d + 1)/E*(d). E2(d) is a useful quantity to distinguish deterministic signals from
stochastic signals. For random data, since the future values are independent of the past values,
E2(d) will be equal to 1 for any d. For deterministic data, E2(d) is certainly related to d, it
cannot be a constant for all d; there must exist somed's such that E2(d) is not 1.
"""
values = np.asarray(
[_embedding_dimension_afn_d(signal, dimension, delay, **kwargs) for dimension in dimension_seq]
).T
E, Es = values[0, :], values[1, :]
return E, Es
def _embedding_dimension_afn_d(signal, dimension, delay=1, metric="chebyshev", window=10, maxnum=None, **kwargs):
"""Return E(d) and E^*(d) for a single d.
Returns E(d) and E^*(d) for the AFN method for a single d.
"""
# We need to reduce the number of points in dimension d by tau
# so that after reconstruction, there'll be equal number of points
# at both dimension d as well as dimension d + 1.
y1 = complexity_embedding(signal[:-delay], delay=delay, dimension=dimension)
y2 = complexity_embedding(signal, delay=delay, dimension=dimension + 1)
# Find near neighbors in dimension d.
index, dist = _embedding_dimension_neighbors(y1, metric=metric, window=window, maxnum=maxnum)
# Compute the near-neighbor distances in d + 1 dimension
d = np.asarray([scipy.spatial.distance.chebyshev(i, j) for i, j in zip(y2, y2[index])])
# Compute the ratio of near-neighbor distances in d + 1 over d dimension
# Its average is E(d)
E = np.mean(d / dist)
# Calculate E^*(d)
Es = np.mean(np.abs(y2[:, -1] - y2[index, -1]))
return E, Es
def _embedding_dimension_ffn(signal, dimension_seq, delay=1, **kwargs):
"""Compute the fraction of false nearest neighbors.
The false nearest neighbors (FNN) method described by Kennel et al.
(1992) to calculate the minimum embedding dimension required to embed a scalar time series.
Parameters
----------
signal : Union[list, np.array, pd.Series]
The signal (i.e., a time series) in the form of a vector of values.
dimension_seq : int
The embedding dimension.
delay : int
Time delay (often denoted 'Tau', sometimes referred to as 'lag').
**kwargs
Other arguments.
Returns
-------
f1 : array
Fraction of neighbors classified as false by Test I.
f2 : array
Fraction of neighbors classified as false by Test II.
f3 : array
Fraction of neighbors classified as false by either Test I
or Test II.
"""
values = np.asarray(
[_embedding_dimension_ffn_d(signal, dimension, delay, **kwargs) for dimension in dimension_seq]
).T
f1, f2, f3 = values[0, :], values[1, :], values[2, :]
return f1, f2, f3
def _embedding_dimension_ffn_d(signal, dimension, delay=1, R=10.0, A=2.0, metric="euclidean", window=10, maxnum=None):
"""Return fraction of false nearest neighbors for a single d."""
# We need to reduce the number of points in dimension d by tau
# so that after reconstruction, there'll be equal number of points
# at both dimension d as well as dimension d + 1.
y1 = complexity_embedding(signal[:-delay], delay=delay, dimension=dimension)
y2 = complexity_embedding(signal, delay=delay, dimension=dimension + 1)
# Find near neighbors in dimension d.
index, dist = _embedding_dimension_neighbors(y1, metric=metric, window=window, maxnum=maxnum)
# Compute the near-neighbor distances in d + 1 dimension
d = np.asarray([scipy.spatial.distance.chebyshev(i, j) for i, j in zip(y2, y2[index])])
# Find all potential false neighbors using Kennel et al.'s tests.
f1 = np.abs(y2[:, -1] - y2[index, -1]) / dist > R
f2 = d / np.std(signal) > A
f3 = f1 | f2
return np.mean(f1), np.mean(f2), np.mean(f3)
# =============================================================================
# Internals
# =============================================================================
def _embedding_dimension_plot(
method, dimension_seq, min_dimension, E1=None, E2=None, f1=None, f2=None, f3=None, ax=None
):
if ax is None:
fig, ax = plt.subplots()
else:
fig = None
ax.set_title("Optimization of Dimension (d)")
ax.set_xlabel("Embedding dimension $d$")
ax.set_ylabel("$E_1(d)$ and $E_2(d)$")
if method in ["afnn"]:
ax.plot(dimension_seq[:-1], E1, "bo-", label="$E_1(d)$", color="#FF5722")
ax.plot(dimension_seq[:-1], E2, "go-", label="$E_2(d)$", color="#f44336")
if method in ["fnn"]:
ax.plot(dimension_seq, 100 * f1, "bo--", label="Test I", color="#FF5722")
ax.plot(dimension_seq, 100 * f2, "g^--", label="Test II", color="#f44336")
ax.plot(dimension_seq, 100 * f3, "rs-", label="Test I + II", color="#852b01")
ax.axvline(x=min_dimension, color="#E91E63", label="Optimal dimension: " + str(min_dimension))
ax.legend(loc="upper right")
return fig
def _embedding_dimension_neighbors(
signal, dimension_max=20, delay=1, metric="chebyshev", window=0, maxnum=None, show=False
):
"""Find nearest neighbors of all points in the given array. Finds the nearest neighbors of all points in the given
array using SciPy's KDTree search.
Parameters
----------
signal : ndarray or array or list or Series
embedded signal: N-dimensional array containing time-delayed vectors, or
signal: 1-D array (e.g.time series) of signal in the form of a vector of values.
If signal is input, embedded signal will be created using the input dimension and delay.
delay : int
Time delay (often denoted 'Tau', sometimes referred to as 'lag'). In practice, it is common
to have a fixed time lag (corresponding for instance to the sampling rate; Gautama, 2003),
or to find a suitable value using some algorithmic heuristics (see ``delay_optimal()``).
dimension_max : int
The maximum embedding dimension (often denoted 'm' or 'd', sometimes referred to as 'order')
to test.
metric : str
Metric to use for distance computation. Must be one of "cityblock" (aka the Manhattan metric),
"chebyshev" (aka the maximum norm metric), or "euclidean". Defaults to 'chebyshev'.
window : int
Minimum temporal separation (Theiler window) that should exist between near neighbors.
This is crucial while computing Lyapunov exponents and the correlation dimension. Defaults to 0.
maxnum : int
Maximum number of near neighbors that should be found for each point.
In rare cases, when there are no neighbors that are at a nonzero distance, this will have to
be increased (i.e., beyond 2 * window + 3). Defaults to None (optimum).
show : bool
Defaults to False.
Returns
-------
index : array
Array containing indices of near neighbors.
dist : array
Array containing near neighbor distances.
"""
# Sanity checks
if len(signal.shape) == 1:
y = complexity_embedding(signal, delay=delay, dimension=dimension_max)
else:
y = signal
if metric == "chebyshev":
p = np.inf
elif metric == "cityblock":
p = 1
elif metric == "euclidean":
p = 2
else:
raise ValueError('Unknown metric. Should be one of "cityblock", ' '"euclidean", or "chebyshev".')
tree = scipy.spatial.cKDTree(y) # pylint: disable=E1102
n = len(y)
if not maxnum:
maxnum = (window + 1) + 1 + (window + 1)
else:
maxnum = max(1, maxnum)
if maxnum >= n:
raise ValueError("maxnum is bigger than array length.")
dists = np.empty(n)
indices = np.empty(n, dtype=int)
for i, x in enumerate(y):
for k in range(2, maxnum + 2):
# querry for k number of nearest neighbours
dist, index = tree.query(x, k=k, p=p)
# remove points that are closer than min temporal separation
# remove self reference (d > 0)
valid = (np.abs(index - i) > window) & (dist > 0)
if np.count_nonzero(valid):
dists[i] = dist[valid][0]
indices[i] = index[valid][0]
break
if k == (maxnum + 1):
raise Exception(
"Could not find any near neighbor with a nonzero distance." "Try increasing the value of maxnum."
)
indices, values = np.squeeze(indices), np.squeeze(dists)
if show is True:
plt.plot(indices, values)
return indices, values