In [6]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import matplotlib as mpl
from numpy.fft import fft, ifft
%matplotlib inline
def estimated_autocorrelation(x):
n = len(x)
mu, sigma2 = np.mean(x), np.var(x)
r = np.correlate(x - mu, x - mu, mode = 'full')[-n:]
result = r/(sigma2 * (np.arange(n, 0, -1)))
return result
In [7]:
data = sio.loadmat('sunspotData.mat')
years, sunspots = data['years'], data['sunspots']
start, end = min(years)[0], max(years)[0]
ave_sunspot = np.empty(end - start + 1)
for i, year in enumerate(range(start, end + 1)):
ave_sunspot[i] = np.mean(sunspots[years == year])
auto_corr = estimated_autocorrelation(ave_sunspot)
We make some plots to visualize the sunspots.
The first plot shows the number of sunspots changing over the years. The period seems to be more or less of 10 years.
From the return plot we confirm the feeling that there is a periodicity in the data. In facr we can see that the return plot is cyclical.
We plotted also the estimated autocorrelation function and we can see more clearly the periodicity of 10 years.
Lastly we show the power plot and we can see two major peaks.
In [26]:
plt.plot(np.arange(start, end + 1), ave_sunspot)
plt.xlabel('t [year]')
plt.ylabel('sunspots')
plt.figure()
plt.plot(ave_sunspot[:-1], ave_sunspot[1:])
plt.xlabel(r'sunspots$_{t - 1}$')
plt.ylabel(r'sunspots$_{t}$')
plt.figure()
#plt.plot(mpl.acorr(ave_sunspot))
plt.figure()
omega = fft(auto_corr)
plt.plot(np.absolute(omega))
plt.grid()
plt.figure()
plt.acorr(ave_sunspot)
Out[26]:
In [28]:
plt.figure()
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax1.acorr(ave_sunspot[1:])
Out[28]:
In [4]:
data = sio.loadmat('investment.mat')
y = data['invs']
In [5]:
plt.plot(y)
Out[5]:
In [2]:
from sklearn import datasets, linear_model
model = linear_model.LinearRegression()
x = np.arange(len(y)).reshape(y.shape)
model.fit(x, y)
Even like this the data does not resamble a stationary time series, in fact the variance seems to change over time, but it's hard to tell. By taking the differences of order 1 or even 2 we can see something that resambles more a stationary time series.
In [7]:
x_st = y - model.predict(x)
dif = np.diff(x_st.flatten(), n=2)
f, axarr = plt.subplots(1, 3, sharey=True, figsize=(15, 3))
axarr[0].plot(x_st)
axarr[1].plot(np.diff(x_st.flatten(), n=1))
axarr[2].plot(np.diff(x_st.flatten(), n=2))
for i in range(3):
axarr[i].grid(linestyle='dotted')
axarr[i].set_title('Diff of order n = ' + str(i))
axarr[i].set_xlabel('Time step')
By looking at the plot of the autocorrelation function we can see a major peak at 25. This seems a reasonable value for what looks like a business cycle. The other peaks seem to be at a too high of a value to be taken into consideratin. We do not have enough time step to understand if there is a cycle at 90 time steps.
In [8]:
plt.plot(estimated_autocorrelation(x_st.flatten()))
Out[8]:
In [12]:
def AR(coeff, init, T):
offset = coeff[0]
mult_coef = np.flip(coeff, 0)[:-1]
print(mult_coef)
series = np.zeros(T)
for k, x_i in enumerate(init):
series[k] = x_i
print(series[k - k - 1:k])
for i in range(k + 1, T):
series[i] = np.sum(mult_coef * series[i - k - 1:i]) + np.random.normal(0, 1) + offset
return series
In [13]:
coeff = np.array([0, -.8, 0, 0, .4])
x = AR(coeff,np.zeros(4), 200)
plt.plot(x)
Out[13]:
From the return plot we can see a strong correlation between succesive values.
In [15]:
plt.plot(x[:-1][-50:], x[1:][-50:], '.')
plt.grid()
The AR(p) can be written as $$y_t = a_t y_{t-1} + a_2 y_{t-2} + ... + a_p y_{t-p} + \epsilon_t$$
Can be written as a AR(1) process: $$\xi_{t} = F\xi_{t-1} \epsilon.$$ Where $$ \xi= \begin{bmatrix} y_t\\ y_{t-1}\\ y_{t-2}\\ \vdots \\ y_{t-p +1} \end{bmatrix} $$
and $$ F = \begin{bmatrix} a_1 & a_2 & \cdots & a_{p-1} & a_p \\ 1 & 0 & \cdots & 0 & 0 \\ 0 & 1 & \cdots & 0 & 0 \\ \vdots& \vdots & \cdots & \vdots & \vdots\\ 0 & 0 & \cdots & 1 & 0 \\ \end{bmatrix} $$
This can be recoundacted to a eigenvalue problem where we need the eigenvalue to be smaller than 1. For this we need that the roots of the following equation to be in the unit circle. $$ \lambda^p - a_1 \lambda^{p-1} - a_2 \lambda ^{p-2} - ... - a_{p-1}\lambda - a_p = 0 $$
We can see that in our problem is not the case, so the process diverges.
In [17]:
coeff = - coeff
coeff[0] = 1
np.all(np.absolute(np.roots(coeff)) < 1.)
np.absolute(np.roots(coeff))
Out[17]:
In [ ]: