In this notebook, we will learn how to use the WWZ function of the pyleoclim
package to do spectral anlaysis on a timeseries.
In detail, what we will do includes:
In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from pyleoclim import Spectral
Let's first define a function to generate colored noise with a given scaling exponent.
In [2]:
def gen_noise(alpha, t, f0=None, m=None):
''' Generate a colored noise timeseries
Args:
alpha (float): exponent of the 1/f^alpha noise
t (float): time vector of the generated noise
f0 (float): fundamental frequency
m (int): maximum number of the waves, which determines the highest frequency of the components in the synthetic noise
Returns:
y (array): the generated 1/f^alpha noise
References:
Eq. (15) in Kirchner, J. W. Aliasing in 1/f(alpha) noise spectra: origins, consequences, and remedies. Phys Rev E Stat Nonlin Soft Matter Phys 71, 066110 (2005).
'''
n = np.size(t) # number of time points
y = np.zeros(n)
if f0 is None:
f0 = 1/n # fundamental frequency
if m is None:
m = n
k = np.arange(m) + 1 # wave numbers
theta = np.random.rand(int(m))*2*np.pi # random phase
for j in range(n):
coeff = (k*f0)**(-alpha/2)
sin_func = np.sin(2*np.pi*k*f0*t[j] + theta)
y[j] = np.sum(coeff*sin_func)
return y
Now let's generate an evenly-spaced colored noise timeseries with the scaling exponent of 1, and then delete 25% points randomly to create a unevenly-spaced timeseries.
In [3]:
np.random.seed(2333)
to_evenly = np.arange(1, 2001)
Xo_evenly = gen_noise(1, to_evenly, m=np.size(to_evenly)/2) # set such m so that no aliasing occurs
n_del = 500 # delete 500 pts from 2000 pts
deleted_idx = np.random.choice(range(np.size(to_evenly)), n_del, replace=False)
to_unevenly = np.delete(to_evenly, deleted_idx)
Xo_unevenly = np.delete(Xo_evenly, deleted_idx)
sns.set(style="darkgrid", font_scale=2)
plt.subplots(figsize=[20, 4])
plt.plot(to_evenly, Xo_evenly, color='gray', label='timeseries')
plt.plot(to_evenly[deleted_idx], Xo_evenly[deleted_idx], 'o', color=sns.xkcd_rgb['denim blue'], label='deleted dots')
plt.legend(fontsize=20, bbox_to_anchor=(1, 1.2), loc='upper right', ncol=3)
Out[3]:
In [4]:
# show the location of the deleted points
print(np.sort(deleted_idx))
Now we will use the function Spectral.wwz_psd()
to do spectral analysis on the evenly/unevenly-spaced timeseries using WWZ method.
Also we will calculate the analytical PSD, as well as the PSD calculated with MTM for comparison.
In [5]:
%%time
freqs = None
tau_evenly = np.linspace(np.min(to_evenly), np.max(to_evenly), 101)
tau_unevenly = np.linspace(np.min(to_unevenly), np.max(to_unevenly), 101)
dcon = 1e-3
res_psd_evenly = Spectral.wwz_psd(Xo_evenly, to_evenly, freqs=freqs, tau=tau_evenly, c=dcon, standardize=False, nMC=0, anti_alias=False)
res_psd_unevenly = Spectral.wwz_psd(Xo_unevenly, to_unevenly, freqs=freqs, tau=tau_unevenly, c=dcon, standardize=False, nMC=0, anti_alias=False)
res_psd_unevenly_high_dcon = Spectral.wwz_psd(Xo_unevenly, to_unevenly, freqs=freqs, tau=tau_unevenly, c=0.02, standardize=False, nMC=0, anti_alias=False)
In [6]:
# analytical PSD
psd_ideal_ref = 0.5*res_psd_evenly.freqs**(-1)/(1/np.size(to_evenly))
In [7]:
# PSD calculated with MTM for comparison
import nitime.algorithms as tsa
freq_mtm, psd_mtm, nu = tsa.multi_taper_psd(Xo_evenly, adaptive=False, jackknife=False, NW=2, Fs=1)
We can use the function Spectral.beta_estimation()
to estimate the scaling exponent of a PSD curve within a frequency range as below.
In [8]:
freq_range = [1/200, 1/2] # range for beta estimation
res_beta_evenly = Spectral.beta_estimation(
res_psd_evenly.psd, res_psd_evenly.freqs, freq_range[0], freq_range[1]
)
res_beta_unevenly = Spectral.beta_estimation(
res_psd_unevenly.psd, res_psd_unevenly.freqs, freq_range[0], freq_range[1]
)
res_beta_unevenly_high_dcon = Spectral.beta_estimation(
res_psd_unevenly_high_dcon.psd, res_psd_unevenly_high_dcon.freqs, freq_range[0], freq_range[1]
)
res_beta_mtm = Spectral.beta_estimation(
psd_mtm, freq_mtm, freq_range[0], freq_range[1]
)
Now let's plot and compare the PSD curves.
In [9]:
period_ticks = [2, 5, 10, 20, 50, 100, 200, 500, 1000]
fig = Spectral.plot_psd(psd_ideal_ref, res_psd_evenly.freqs, plot_ar1=False, psd_ar1_q95=None, period_ticks=period_ticks, lmstyle='--', color='k', label=r'analytical ($\beta$=1)'.format(res_beta_evenly.beta), figsize=[12, 6], linewidth=5)
plt.plot(1/res_psd_evenly.freqs, res_psd_evenly.psd, '-', linewidth=3, label=r'evenly-spaced (WWZ, dcon=0.001) ($\beta$={:.2f}$\pm${:.3f})'.format(res_beta_evenly.beta, res_beta_evenly.std_err), color=sns.xkcd_rgb['denim blue'])
plt.plot(1/res_psd_unevenly.freqs, res_psd_unevenly.psd, '-', linewidth=3, label=r'unevenly-spaced (WWZ, dcon=0.001) ($\beta$={:.2f}$\pm${:.3f})'.format(res_beta_unevenly.beta, res_beta_unevenly.std_err), color=sns.xkcd_rgb['medium green'])
plt.plot(1/res_psd_unevenly_high_dcon.freqs, res_psd_unevenly_high_dcon.psd, '-', linewidth=3, label=r'unevenly-spaced (WWZ, dcon=0.02) ($\beta$={:.2f}$\pm${:.3f})'.format(res_beta_unevenly_high_dcon.beta, res_beta_unevenly_high_dcon.std_err), color=sns.xkcd_rgb['pale red'])
plt.plot(1/freq_mtm, psd_mtm, '-', linewidth=3, label=r'evenly-spaced (MTM) ($\beta$={:.2f}$\pm${:.3f})'.format(res_beta_mtm.beta, res_beta_mtm.std_err), color='gray', alpha=0.5, zorder=-1)
# plt.plot(1/res_beta_evenly.f_binned, res_beta_evenly.Y_reg, '--', color='k', linewidth=3, zorder=99)
# plt.plot(1/res_beta_unevenly.f_binned, res_beta_unevenly.Y_reg, '--', color='k', linewidth=3, zorder=99)
# plt.plot(1/res_beta_mtm.f_binned, res_beta_mtm.Y_reg, '--', color='k', linewidth=3, zorder=99)
plt.legend(fontsize=20, bbox_to_anchor=(1.9, 1), loc='upper right', ncol=1)
Out[9]:
The figure above indicates that in the case of an evenly-spaced timeseries, both WWZ and MTM give good estimates of the analytical PSD. WWZ gives almost accurate PSD over the high frequency band (2-20 yrs), while MTM gives big oscillations there.
When we delete 25% of the data, making the timeseries unevenly-spaced, WWZ with a small decaying constant gives lower estimate of the scaling exponent due to aliasing caused by nonuniform sampling, and a larger decaying constant helps a bit.
The decaying constant will affect the frequency resolution of the WWZ method. Larger constant results in smoother PSD curve, as shown below.
In [10]:
freqs = None
tau_evenly = np.linspace(np.min(to_evenly), np.max(to_evenly), 101)
res_psd = {}
res_beta = {}
freq_range = [1/200, 1/2] # range for beta estimation
# dcon options:
# + 1/(8*np.pi**2) from Witt & Schumann 2005
# + 0.0125 from Foster 1996, very close to 1/(8*np.pi**2)
# + 0.001 from Kirchner & Neal 2013
fig = Spectral.plot_psd(psd_ideal_ref, res_psd_evenly.freqs, plot_ar1=False, psd_ar1_q95=None, period_ticks=period_ticks,
lmstyle='--', color='k', label=r'analytical ($\beta$=1)'.format(res_beta_evenly.beta), figsize=[12, 6], linewidth=5)
for dcon in [0.0001, 0.001, 1/(8*np.pi**2), 0.02]:
res_psd[str(dcon)] = Spectral.wwz_psd(Xo_evenly, to_evenly, freqs=freqs, tau=tau_evenly, c=dcon, standardize=False, nMC=0, anti_alias=False)
res_beta[str(dcon)] = Spectral.beta_estimation(
res_psd[str(dcon)].psd, res_psd[str(dcon)].freqs, freq_range[0], freq_range[1]
)
plt.plot(1/res_psd[str(dcon)].freqs, res_psd[str(dcon)].psd, 'o', ms=5, linewidth=3, label='dcon={:.4f} '.format(dcon)+r'($\beta$={:.2f}$\pm${:.3f})'.format(res_beta[str(dcon)].beta, res_beta[str(dcon)].std_err))
plt.legend(fontsize=20, bbox_to_anchor=(1.54, 1), loc='upper right', ncol=1)
Out[10]:
Now let's do some wavelet analysis on both the evenly/unevenly-spaced data. Note that nMC
sets the number of Monte-Carlo simulations for significance test.
In [11]:
# evenly-spaced timeseries
freqs = None
tau_evenly = np.linspace(np.min(to_evenly), np.max(to_evenly), 101)
freq_range = [1/200, 1/2]
period_ticks = [2, 5, 10, 20, 50, 100, 200, 500, 1000]
fig = Spectral.plot_summary(Xo_evenly, to_evenly, nMC=5, ts_style='-', tau=tau_evenly, period_ticks=period_ticks, period_S=freq_range, period_L=None)
In [12]:
# unevenly-spaced timeseries
freqs = None
tau_unevenly = np.linspace(np.min(to_unevenly), np.max(to_unevenly), 101)
freq_range = [1/200, 1/2]
period_ticks = [2, 5, 10, 20, 50, 100, 200, 500, 1000]
fig = Spectral.plot_summary(Xo_unevenly, to_unevenly, nMC=5, ts_style='-', tau=tau_unevenly, period_ticks=period_ticks, period_S=freq_range, period_L=None)
In [ ]: