In [1]:
    
%matplotlib inline
import pandas as pd
import numpy as np
import tables as tb
import datetime
import os
from scipy.fftpack import rfft, fftshift, fft
from scipy.optimize import least_squares
from numpy import pi
from scipy.signal import tukey, bartlett
from scipy.signal import correlate
from matplotlib import pyplot as plt; plt.rcParams['figure.figsize'] = 15, 5
DATA_DIR = '../data/interim/'
HDF_FILE = 'interim_data.hdf'
    
In [2]:
    
D = pd.read_hdf(DATA_DIR + HDF_FILE, key = '/temperature_ts/wban_94797/D')
t = D.index
T = D['T'].values
T = T - np.mean(T)
dT = D['dT'].values
    
In [3]:
    
plt.plot(T)
plt.plot(dT)
    
    Out[3]:
    
In [43]:
    
N = len(T)  # number of discrete frequenies
T_sample = 3600.  # 1 sample every hour
    
In [5]:
    
ST = fft(T)[0: N // 2]
f = np.linspace(0, 24 * 3600 / (2 * T_sample), N // 2)  # Frequency in 1/days
ST = pd.DataFrame(index=f, data=np.abs(ST))
    
In the plot below we see spikes at frequencies of 1/day, as well as the associated harmonics.
In [6]:
    
plt.plot(ST.index[1:], np.log10(ST.values)[1:])
#plt.plot(f[1:N_final], 20.0 / len(T) * np.log10(np.abs(ST[1:N_final])))
plt.xlabel('frequency in days')
plt.ylabel('Power')
plt.title('T spectra')
    
    Out[6]:
    
In [9]:
    
rx = (1. / N) * correlate(T, T, mode = 'same')
plt.plot(fftshift(rx)[0:N//2])
    
    Out[9]:
    
In [10]:
    
# THIS METHOD OF FINDING THE PEAKS IS COMPLETELY NON-ROBUST
a = np.argsort(-ST.values.flatten())
# Frequeny in units of days
f0_yr = ST.index[a[0]]
f0_d = ST.index[a[1]]
print(f0_yr, f0_d)
    
    
In [21]:
    
print(a[0:25])
f_yr = f[52]
f_d = f[19012]
f_hd = f[38024]
print(f_yr, f_d, f_hd)
    
    
In [34]:
    
unix_birth = datetime.datetime(1970, 1, 1)
time_in_days = lambda t: (t - unix_birth).total_seconds() / 86400  # 86400 = timedelta(days=1).total_seconds()
t_days = np.fromiter(map(time_in_days, t), np.float64)  # Time indices in units of days
    
In [29]:
    
# Error functions for sinusoidal regression
def err_f0(theta):  # No frequency optimization
    a_yr, a_d, a_hd, phi_yr, phi_d, phi_hd = theta
    syr = a_yr * np.sin(2*pi*f_yr*t_days + phi_yr)
    sd = a_d * np.sin(2*pi*f_d*t_days + phi_d)
    shd = a_hd * np.sin(2*pi*f_hd*t_days + phi_hd)
    return T - syr - sd - shd
    
In [24]:
    
res = least_squares(err_f0, (1, 1, 1, 0, 0, 0), method='lm', loss='linear', verbose=1)
a_yr, a_d, a_hd, phi_yr, phi_d, phi_hd = res.x
print('theta0:', res.x)
print('Optimality:', res.optimality)
print('status:', res.status)
print('message:', res.message)
print('success:', res.success)
x_hat = a_yr * np.sin(2*pi*f_yr*t_days + phi_yr) + a_d * np.sin(2*pi*f_d*t_days + phi_d) +\
        a_hd * np.sin(2*pi*f_hd*t_days + phi_hd)
    
    
In [25]:
    
fig, axes = plt.subplots(3, 1)
ax0, ax1, ax2 = axes
ax0.plot(t, x_hat); ax0.plot(t, T, alpha=0.5)
ax1.plot(t[0:100000], x_hat[0:100000]); ax1.plot(t[0:100000], T[0:100000], alpha=0.5)
ax2.plot(t[0:1000], x_hat[0:1000]); ax2.plot(t[0:1000], T[0:1000], alpha=0.5)
    
    Out[25]:
    
In [51]:
    
T1 = T - x_hat
plt.plot(t, T1)
plt.plot(t, T, alpha = 0.5)
    
    Out[51]:
    
In [52]:
    
N = len(T1)
ST1 = fft(T1)[:N // 2]
ST1 = pd.DataFrame(index=f, data=np.abs(ST1))
    
In [53]:
    
plt.plot(ST1.index, np.log10(ST1.values))
#plt.plot(f[1:N_final], 20.0 / len(T) * np.log10(np.abs(ST[1:N_final])))
plt.xlabel('frequency in days')
plt.ylabel('Power')
plt.title('$T_1$ spectra')
    
    Out[53]:
    
In [55]:
    
rx = (1. / N) * correlate(T1, T1, mode = 'same')
plt.plot(fftshift(rx)[0:N//2])
    
    Out[55]:
    
Repeating the process is not useful
In [60]:
    
print(a[0:25])
f0_yr = f[52]
f0_d = f[19012]
print(f0_yr, f0_d)
    
    
In [61]:
    
# Error functions for sinusoidal regression
def err_f1(theta):  # No frequency optimization
    a_yr, a_d, phi_yr, phi_d = theta
    syr = a_yr * np.sin(2*pi*f0_yr*t_days + phi_yr)
    sd = a_d * np.sin(2*pi*f0_d*t_days + phi_d)
    return T1 - syr - sd
    
In [62]:
    
res0 = least_squares(err_f1, (1, 1, 0, 0), method='lm', loss='linear', verbose=1)
a0_yr, a0_d, phi0_yr, phi0_d = res0.x
print('theta0:', res0.x)
print('Optimality:', res0.optimality)
print('status:', res0.status)
print('message:', res0.message)
print('success:', res0.success)
x_hat = a_yr * np.sin(2*pi*f0_yr*t_days + phi_yr) + a_d * np.sin(2*pi*f0_d*t_days + phi_d)
    
    
In [63]:
    
fig, axes = plt.subplots(3, 1)
ax0, ax1, ax2 = axes
ax0.plot(t, x_hat); ax0.plot(t, T1, alpha=0.5)
ax1.plot(t[0:100000], x_hat[0:100000]); ax1.plot(t[0:100000], T1[0:100000], alpha=0.5)
ax2.plot(t[0:1000], x_hat[0:1000]); ax2.plot(t[0:1000], T1[0:1000], alpha=0.5)
    
    Out[63]: