In [1]:
%matplotlib inline

In [48]:
import os
import h5py
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
from multiprocessing import Pool
from scipy.signal import resample, fftconvolve

from tqdm import tqdm

from peerless.catalogs import KICatalog

In [3]:
from acannon.acannon import AsteroCannon

In [4]:


In [ ]:
df = pd.read_csv("stello_2013.dat", delim_whitespace=True,
                 names=["kicid", "nu_max", "delta_nu"])
kic = KICatalog().df

joined = pd.merge(df, kic, left_on="kicid", right_on="kepid")

OUTPUT_DIR = "spectra"

n = 500
sig2 = n**2
conv = np.exp(-0.5 * np.arange(-2*n, 2*n)**2 / sig2) / np.sqrt(2*np.pi*sig2)

# inds = np.random.randint(len(joined), size=3000)
# rows = joined.iloc[inds]
rows = joined

P0 = None
labels = np.empty((len(rows), 3))
for i, (_, row) in tqdm(enumerate(rows.iterrows()), total=len(rows)):
    fn = os.path.join(OUTPUT_DIR, "{0}.h5".format(int(row.kicid)))
#     if not os.path.exists(fn):
#         continue
    
    # Load the power spectrum.
    with h5py.File(fn, "r") as f:
        power = f["power"]["power"][::2]
    p = np.log(fftconvolve(power, conv, mode="valid")[::100])
    if P0 is None:
        P0 = np.empty((len(rows), len(p)))
    P0[i] = p
    
    # Load the labels
    teff = row.teff
    feh = row.feh
    logg = row.logg
    labels[i] = (logg, teff, feh)

with open("dataset.pkl", "wb") as f:
    pickle.dump((P0, labels), f, -1)


  1%|          | 139/12374 [00:06<09:04, 22.48it/s]

In [6]:


In [7]:
with open("dataset.pkl", "rb") as f:
    P0, labels = pickle.load(f)

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [8]:
model = AsteroCannon(order=[15, 4, 3])

In [27]:
model.fit(labels[:1000], P0[:1000])

In [44]:
ind = 1580
y = P0[ind]
new_label = model.infer_one(y)
label = labels[ind]

In [45]:
new_label, label


Out[45]:
(array([  2.37929104e+00,   5.01069872e+03,   6.51811107e-02]),
 array([  2.33600000e+00,   4.96900000e+03,   7.00000000e-02]))

In [46]:
mu_1 = model.predict(label)[0]
mu_2 = model.predict(new_label)[0]

In [47]:
pl.plot(y - np.median(y), "k")
pl.plot(mu_1, "r");
pl.plot(mu_2, "g");



In [ ]:


In [ ]: