In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
import warnings
In [2]:
from astroML.time_series import multiterm_periodogram
from astroML.time_series import lomb_scargle
We need functions:
Strategy for finding all the maxima: compute second derivative, look for all the concave up regions, compute maximum in each region, sort by maxima.
In [3]:
def light_curve_data():
'''returns the light curve as numpy array, given EPIC ID'''
file = '~/Desktop/beehive/c05/211900000/35518/' + \
'hlsp_k2sff_k2_lightcurve_211935518-c05_kepler_v1_llc-default-aper.txt'
raw_lc = pd.read_csv(file, index_col=False, names=['time', 'flux'], skiprows=1)
return raw_lc
In [4]:
def run_periodograms(light_curve, P_range=[0.1, 10], samples=10000):
'''Returns periodograms for hardcoded subset of K2 Cycle 2 lightcurve'''
x_full = light_curve.time.values
y_full = light_curve.flux.values
gi = y_full == y_full#(x_full > 2065) & (x_full < 2095)
x, y = x_full[gi], y_full[gi]
yerr = y*0.001
periods = np.linspace(P_range[0], P_range[1], samples)
omega = 2.00*np.pi/periods
P_M = multiterm_periodogram(x, y, yerr, omega)
P_LS = lomb_scargle(x, y, yerr, omega)
return (periods, P_M, P_LS)
In [5]:
from scipy.signal import argrelmax
In [6]:
def top_N_periods(periods, lomb_scargle_power, n=5):
'''Returns the top N Lomb-Scargle periods, given a vector of the periods and values'''
# Get all the local maxima
all_max_i = argrelmax(lomb_scargle_power)
max_LS = lomb_scargle_power[all_max_i]
max_periods = periods[all_max_i]
# Sort by the Lomb-Scale power
sort_i = np.argsort(max_LS)
# Only keep the top N periods
top_N_LS = max_LS[sort_i][::-1][0:n]
top_N_pers = max_periods[sort_i][::-1][0:n]
return top_N_pers, top_N_LS
In [7]:
def plot_LC_and_periodograms(lc, periods, P_M, P_LS):
plt.figure(figsize=(14,6))
plt.subplot(121)
plt.step(lc.time, lc.flux)
#plt.xlim(2065, 2095)
plt.subplot(122)
plt.step(periods, P_M, label='Multi-term periodogram')
plt.step(periods, P_LS, label='Lomb Scargle')
plt.legend()
In [8]:
def plot_periodograms(periods, P_M, P_LS):
plt.figure(figsize=(8,8))
plt.step(periods, P_M, label='Multi-term periodogram')
plt.step(periods, P_LS, label='Lomb Scargle')
plt.legend()
In [9]:
lc = light_curve_data()
In [10]:
periods, P_M, P_LS = run_periodograms(lc, P_range=[1.1, 3.1], samples=10000)
In [11]:
plot_LC_and_periodograms(lc, periods, P_M, P_LS)
In [12]:
v1, v2 = top_N_periods(periods, P_M)
In [13]:
v1
Out[13]:
In [14]:
v2
Out[14]:
In [15]:
periods, P_M, P_LS = run_periodograms(lc, P_range=[0.1,20])
plot_LC_and_periodograms(lc, periods, P_M, P_LS)
In [16]:
P_coarse_fit = periods[np.argmax(P_LS)]
In [17]:
lc.columns
Out[17]:
In [18]:
x_full = lc.time.values
y_full = lc.flux.values
gi = x_full==x_full#(x_full > 2065) & (x_full < 2095)
x, y = x_full[gi], y_full[gi]
yerr = y*0.001
In [19]:
periods = np.linspace(P_coarse_fit*0.8, P_coarse_fit*1.2, 10000)
omega = 2.00*np.pi/periods
P_M = multiterm_periodogram(x, y, yerr, omega)
P_LS = lomb_scargle(x, y, yerr, omega)
In [20]:
P_fit = periods[np.argmax(P_LS)]
In [21]:
P_fit
Out[21]:
$y = c_0 + c_1 \cdot x + c_2 \cdot \sin{\frac{2\pi x}{P}} + c_3 \cdot \cos{\frac{2\pi x}{P}} $
We have four coefficients: $c_1, c_2, c_3, c_4$
In [22]:
sin_vector = np.sin(2.0*np.pi*x/P_fit)
cos_vector = np.cos(2.0*np.pi*x/P_fit)
In [23]:
A = np.concatenate((np.expand_dims(cos_vector, 1),
np.expand_dims(sin_vector, 1),
np.vander(x, 1)), axis=1)
In [24]:
ATA = np.dot(A.T, A / yerr[:, None]**2)
sigma_w = np.linalg.inv(ATA)
mean_w = np.linalg.solve(ATA, np.dot(A.T, y/yerr**2))
In [25]:
yfit = np.matmul(mean_w, A.T)
In [26]:
plt.figure(figsize=(14, 4))
plt.plot(x, y)
plt.plot(x, yfit)
plt.ylim(0.90, 1.10)
plt.title("EPIC {} K2 C02 Light Curve".format(211935518))
plt.xlabel('BJD - 2454833')
plt.ylabel('Flux');
In [27]:
from sklearn import cross_validation
In [28]:
def lin_regress(A, y, yerr):
ATA = np.dot(A.T, A / yerr[:, None]**2)
sigma_w = np.linalg.inv(ATA)
mean_w = np.linalg.solve(ATA, np.dot(A.T, y/yerr**2))
return mean_w
In [29]:
polys = np.arange(1, 15)
net_scores = np.zeros(len(polys))
n_folds = 10
In [30]:
j = 0
for n_poly in polys:
print(j)
X = np.concatenate((np.expand_dims(cos_vector, 1),
np.expand_dims(sin_vector, 1),
np.vander(x, n_poly)), axis=1)
n, n_dim = X.shape
scores_test = np.zeros(n_folds)
kf = cross_validation.KFold(n, n_folds=n_folds)
i = 0
for train_index, test_index in kf:
#print("TRAIN:", train_index.shape, "TEST:", test_index.shape)
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
y_train_err, y_test_err = yerr[train_index], yerr[test_index]
w_train = lin_regress(X_train, y_train, y_train_err)
y_test_fit = np.matmul(w_train, X_test.T)
resid = np.sqrt(np.sum(((y_test-y_test_fit)/y_test_err)**2))
scores_test[i] = resid
i += 1
net_scores[j] = np.mean(scores_test)
j += 1
In [31]:
plt.plot(polys, net_scores)
plt.xlabel('Polynomial Degree')
plt.ylabel('Mean Test RMS')
Out[31]:
In [32]:
opt_poly = polys[np.argmin(net_scores)]
In [33]:
A = np.concatenate((np.expand_dims(cos_vector, 1),
np.expand_dims(sin_vector, 1),
np.vander(x, opt_poly)), axis=1)
In [34]:
ATA = np.dot(A.T, A / yerr[:, None]**2)
sigma_w = np.linalg.inv(ATA)
mean_w = np.linalg.solve(ATA, np.dot(A.T, y/yerr**2))
In [35]:
#sns.heatmap(np.log10(sigma_w), annot=True)
In [36]:
mean_w
Out[36]:
In [37]:
n_dim, = mean_w.shape
The first two are A and B.
In [38]:
np.sqrt(mean_w[0]**2 + mean_w[1]**2)
Out[38]:
In [39]:
yfit = np.matmul(mean_w, A.T)
In [40]:
plt.figure(figsize=(14, 4))
plt.plot(x, y)
plt.plot(x, yfit)
plt.ylim(0.90, 1.10)
plt.title("EPIC {} K2 C02 Light Curve".format(211935518))
plt.xlabel('BJD - 2454833')
plt.ylabel('Flux');
That's just for one period. But we have many from the multiterm fit! Let's use that information.
$\hat y = c_0 + c_1 \cdot x + c_2 \cdot \sin{\frac{2\pi x}{P}} + c_3 \cdot \cos{\frac{2\pi x}{P}} $
becomes:
$\hat y = c_0 + c_1 \cdot x + \dots + \sum_{j=1}^{N_j} a_j \cdot \sin{\frac{2\pi x}{P/j}} + b_j \cdot \cos{\frac{2\pi x}{P/j}} $
$\hat y = \sum_{i=0}^{N_i} c_i \cdot x^i + \sum_{j=1}^{N_j} a_j \cdot \sin{\frac{2\pi j x}{P}} + b_j \cdot \cos{\frac{2\pi j x}{P}} $
We have many coefficients: $c_i, a_j, b_j$
In [41]:
from gatspy.periodic import LombScargle, LombScargleFast
In [42]:
dy = y*0.0+0.0003
In [43]:
N_j = 4
In [44]:
sin_vectors = np.array([np.sin(2.0*np.pi*j*x/P_fit) for j in range(1, N_j+1)])
cos_vectors = np.array([np.cos(2.0*np.pi*j*x/P_fit) for j in range(1, N_j+1)])
In [45]:
periods, P_M, P_LS
Out[45]:
In [167]:
#P_fits = [4.183, 3.932626, 2.05549555]
dP=-0.05
P_fits = [4.1834699581569312, 3.932626, 2.05549555]
sin_vectors = np.array([np.sin(2.0*np.pi*x/P_fit) for P_fit in P_fits])
cos_vectors = np.array([np.cos(2.0*np.pi*x/P_fit) for P_fit in P_fits])
#sin_vectors = np.array([np.sin(2.0*np.pi*j*x/P_fit) for j in range(1, N_j+1)])
#cos_vectors = np.array([np.cos(2.0*np.pi*j*x/P_fit) for j in range(1, N_j+1)])
n_poly = 2
A = np.concatenate((cos_vectors.T,
sin_vectors.T,
np.vander(x, 10)), axis=1)
ATA = np.dot(A.T, A / yerr[:, None]**2)
sigma_w = np.linalg.inv(ATA)
mean_w = np.linalg.solve(ATA, np.dot(A.T, y/yerr**2))
yfit = np.matmul(mean_w, A.T)
yfit_smooth = np.matmul(mean_w[-opt_poly:], A[:, -opt_poly:].T)
plt.plot(x, yfit)
plt.plot(x, yfit_smooth, 'k--')
plt.step(x, y, alpha=0.3)
Out[167]:
In [182]:
from scipy.signal import savgol_filter
In [ ]:
In [185]:
y.shape
Out[185]:
In [187]:
3401 * 29.54 /(4.183*24*60.0)
Out[187]:
In [204]:
y_sm = savgol_filter(y, 17, 3)
In [205]:
plt.plot(x, y)
plt.plot(x, y_sm)
Out[205]:
In [210]:
plt.figure(figsize=(20, 8))
plt.plot(phased, y/y_sm, 'o', alpha=0.3)
Out[210]: