In [1]:
%matplotlib inline
%config InlineBackend.figure_format = "retina"

from __future__ import print_function

from matplotlib import rcParams
rcParams["savefig.dpi"] = 100
rcParams["figure.dpi"] = 100
rcParams["font.size"] = 20

Advanced kernel modeling

This notebook was made with the following version of george:


In [2]:
import george
george.__version__


Out[2]:
'0.3.1'

In [3]:
import kplr
import numpy as np
import matplotlib.pyplot as pl

client = kplr.API()
lc = client.light_curves(10295224, short_cadence=False, sci_data_quarter=3)[0]
data = lc.read()

x = data["TIME"]
y = data["SAP_FLUX"]
yerr = data["SAP_FLUX_ERR"]
mu = np.median(y[np.isfinite(y)])
y /= mu
yerr /= mu

pl.errorbar(x, y, yerr=yerr, fmt=".k", capsize=0)
pl.xlabel("time [days]")
pl.ylabel("relative brightness");



In [4]:
pl.errorbar(x % 2.9327280226002399, y, yerr=yerr, fmt=".k", capsize=0)
pl.xlabel("time [days]")
pl.ylabel("relative brightness");



In [5]:
from scipy.ndimage.measurements import label

sects, count = label(np.isfinite(x))
ranges = [(x[sects == s].min(), x[sects == s].max()) for s in range(1, count+1)]
ranges = [rng for rng in ranges if rng[1] > rng[0]]
print(ranges)


[(260.2250784075004, 290.54767162804637), (291.42628471651551, 321.44203856051899), (323.52618048039585, 349.49623066330241)]

In [6]:
m = np.isfinite(x) & np.isfinite(y) & np.isfinite(yerr)
x = x[m]
y = y[m]
yerr = yerr[m]

In [7]:
from george import kernels

kernel = np.var(y) * kernels.ExpSine2Kernel(log_period=np.log(2.75), gamma=18.0) * kernels.ExpSquaredKernel(10.)
for rng in ranges:
    kernel += np.var(y) * kernels.Matern32Kernel(50., block=[rng])
    
gp = george.GP(kernel, solver=george.HODLRSolver, mean=1.0)
gp.compute(x, yerr)

K = gp.get_matrix(x)
pl.imshow(K, cmap="gray", interpolation="nearest")
pl.gca().set_xticklabels([])
pl.gca().set_yticklabels([]);



In [8]:
names = gp.get_parameter_names()
[gp.freeze_parameter(n) for n in names[4:]]
gp.get_parameter_names()


Out[8]:
('kernel:k1:k1:k1:k1:k1:log_constant',
 'kernel:k1:k1:k1:k1:k2:gamma',
 'kernel:k1:k1:k1:k1:k2:log_period',
 'kernel:k1:k1:k1:k2:metric:log_M_0_0')

In [9]:
def nll0(p):
    gp.set_parameter_vector(p)
    return -gp.log_likelihood(y, quiet=True)

def grad_nll0(p):
    gp.set_parameter_vector(p)
    return -gp.grad_log_likelihood(y, quiet=True)

In [10]:
print(nll0(gp.get_parameter_vector()))
print(grad_nll0(gp.get_parameter_vector()))


-20929.8436697
[ -371.66703777  -135.3250629   7172.24747429   -59.40215444]

In [11]:
from scipy.optimize import minimize
bounds = gp.get_parameter_bounds()
bounds[2] = (np.log(2.5), np.log(3.0))
result = minimize(nll0, gp.get_parameter_vector(), jac=grad_nll0, method="L-BFGS-B",
                  bounds=bounds)
print(result)


      fun: -21193.780652145728
 hess_inv: <4x4 LbfgsInvHessProduct with dtype=float64>
      jac: array([    4.27777278,  -103.05645525,  1556.79417276,   116.64083251])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 20
      nit: 5
   status: 0
  success: True
        x: array([ -9.66245009,  19.35179801,   0.91629073,   2.86713212])

In [12]:
np.exp(result.x)


Out[12]:
array([  6.36284355e-05,   2.53734247e+08,   2.50000000e+00,
         1.75865098e+01])

In [13]:
gp.set_parameter_vector(result.x)

In [14]:
K = gp.get_matrix(x)
pl.imshow(K, cmap="gray", interpolation="nearest")
pl.gca().set_xticklabels([])
pl.gca().set_yticklabels([]);



In [15]:
mu = gp.predict(y, x, return_cov=False)

In [16]:
pl.plot(x, y, ".k")
pl.plot(x, mu, "g")


Out[16]:
[<matplotlib.lines.Line2D at 0x617c71710>]

In [17]:
gp.thaw_all_parameters()

In [18]:
gp.get_parameter_names()


Out[18]:
('mean:value',
 'white_noise:value',
 'kernel:k1:k1:k1:k1:k1:log_constant',
 'kernel:k1:k1:k1:k1:k2:gamma',
 'kernel:k1:k1:k1:k1:k2:log_period',
 'kernel:k1:k1:k1:k2:metric:log_M_0_0',
 'kernel:k1:k1:k2:k1:log_constant',
 'kernel:k1:k1:k2:k2:metric:log_M_0_0',
 'kernel:k1:k2:k1:log_constant',
 'kernel:k1:k2:k2:metric:log_M_0_0',
 'kernel:k2:k1:log_constant',
 'kernel:k2:k2:metric:log_M_0_0')

In [19]:
def update_gp_with_vector(p):
    vector = np.concatenate([p[:4]] + [p[4:] for _ in ranges])
    gp.set_parameter_vector(vector)

def nll(p):
    update_gp_with_vector(p)
    return -gp.lnlikelihood(y)

def grad_nll(p):
    update_gp_with_vector(p)
    grad = gp.grad_lnlikelihood(y)
    n = 6
    for _ in ranges[:-1]:
        grad[4:6] += grad[n:n+2]
        n += 2
    return -grad[:6]

In [35]:
nll(gp.get_parameter_vector()[:6])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-35-38483d2075f0> in <module>()
----> 1 nll(gp.get_parameter_vector()[:6])

<ipython-input-33-245ff98b3488> in nll(p)
      4 
      5 def nll(p):
----> 6     update_gp_with_vector(p)
      7     return -gp.lnlikelihood(y)
      8 

<ipython-input-33-245ff98b3488> in update_gp_with_vector(p)
      1 def update_gp_with_vector(p):
      2     vector = np.concatenate([p[:4]] + [p[4:] for _ in ranges])
----> 3     gp.set_parameter_vector(vector)
      4 
      5 def nll(p):

/Users/dfm/research/projects/george/george/modeling.py in set_parameter_vector(self, vector, include_frozen)
    248             v[:] = vector
    249         else:
--> 250             v[self.unfrozen_mask] = vector
    251         self.parameter_vector = v
    252         self.dirty = True

ValueError: NumPy boolean array indexing assignment cannot assign 10 input values to the 12 output values where the mask is true

In [66]:
bounds = bounds[:4] + [(None, None), (2*np.log(3.0), None)]
result = minimize(nll, gp.get_vector()[:6], jac=grad_nll, method="L-BFGS-B",
                  bounds=bounds)

In [67]:
result


Out[67]:
  status: 0
 success: True
    nfev: 23
     fun: -31701.465175322373
       x: array([-15.48532286,  29.49509814,   1.09861229,   6.35171531,
        -5.73608437,   2.19722458])
 message: 'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     jac: array([  5.59920833e+03,   3.77054178e+02,  -4.20164665e+04,
         1.08358643e+02,   1.49932879e+01,  -2.16634231e+01])
     nit: 4

In [68]:
update_gp_with_vector(result.x)

In [69]:
K = gp.get_matrix(x)
pl.imshow(np.log(K), cmap="gray", interpolation="nearest")
pl.gca().set_xticklabels([])
pl.gca().set_yticklabels([]);



In [70]:
gp.kernel


Out[70]:
ConstantKernel(ln_constant=-15.4853228636, ndim=1, axes=[0]) * ExpSine2Kernel(gamma=29.4950981398, ln_period=1.09861228867, ndim=1, axes=[0]) * ExpSquaredKernel(metric=Metric(573.475555574, ndim=1, axes=[0])) + ConstantKernel(ln_constant=-5.73608437475, ndim=1, axes=[0]) * Matern32Kernel(metric=Metric(9.0, ndim=1, axes=[0])) + ConstantKernel(ln_constant=-5.73608437475, ndim=1, axes=[0]) * Matern32Kernel(metric=Metric(9.0, ndim=1, axes=[0])) + ConstantKernel(ln_constant=-5.73608437475, ndim=1, axes=[0]) * Matern32Kernel(metric=Metric(9.0, ndim=1, axes=[0]))

In [71]:
mu = gp.predict(y, x, return_cov=False)
pl.plot(x, y, ".k")
pl.plot(x, mu, "g")


Out[71]:
[<matplotlib.lines.Line2D at 0x116c84810>]

In [73]:
K_0 = gp.get_matrix(x) - gp.kernel.k1.k1.k1.get_value(x[:, None])

In [74]:
pred = np.dot(K_0, gp.apply_inverse(y)) + 1.0

pl.plot(x, y-pred, ".g")


Out[74]:
[<matplotlib.lines.Line2D at 0x111a91d10>]

In [75]:
period = np.exp(gp.kernel.k1.k1.k1.k1.k2.ln_period)

In [76]:
samp = gp.sample(x)

In [77]:
pl.plot(x, samp, ".k")


Out[77]:
[<matplotlib.lines.Line2D at 0x112227850>]

In [78]:
gp.get_parameter_names()


Out[78]:
['kernel:k1:k1:k1:k1:k1:ln_constant',
 'kernel:k1:k1:k1:k1:k2:gamma',
 'kernel:k1:k1:k1:k1:k2:ln_period',
 'kernel:k1:k1:k1:k2:ln_M_0_0',
 'kernel:k1:k1:k2:k1:ln_constant',
 'kernel:k1:k1:k2:k2:ln_M_0_0',
 'kernel:k1:k2:k1:ln_constant',
 'kernel:k1:k2:k2:ln_M_0_0',
 'kernel:k2:k1:ln_constant',
 'kernel:k2:k2:ln_M_0_0']

In [ ]: