gully
May 2, 2018
In [1]:
# %load /Users/obsidian/Desktop/defaults.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [2]:
! du -hs ../data/dr2/Gaia/gdr2/light_curves/csv/
In [3]:
df0 = pd.read_csv('../data/dr2/Gaia/gdr2/light_curves/csv/light_curves_1042504286338226688_1098703830327377408.csv.gz')
In [4]:
df0.shape
Out[4]:
In [5]:
df0.head(2)
Out[5]:
In [6]:
df0.tail(2)
Out[6]:
Ok, this flat file is just what we want. It contains the flux as a function of time for unique sources, with additional metadata flags.
In [7]:
import glob
In [8]:
fns = glob.glob('../data/dr2/Gaia/gdr2/light_curves/csv/light_curves_*.csv.gz')
In [9]:
n_files = len(fns)
In [10]:
n_files
Out[10]:
It looks like the filename encodes the range of sources housed in each file. Let's extract that metadata without having to read the files.
In [11]:
fn_df = pd.DataFrame({'fn':fns})
fn_df.head()
Out[11]:
In [12]:
fn_df['basename'] = fn_df.fn.str.split('/').str[-1].str.split('light_curves_').str[-1].str.split('.csv.gz').str[0]
In [13]:
fn_df['low'] = fn_df.basename.str.split('_').str[0].astype(np.int64)
fn_df['high'] = fn_df.basename.str.split('_').str[1].astype(np.int64)
Now we can make a mask to find which file we want. Let's say we want the Gaia source: 66511970924353792
In [14]:
source = 66511970924353792
k2_source = 211059767
gaia_period = 0.771791
In [15]:
mask = (source > fn_df.low) & (source < fn_df.high)
In [16]:
mask.sum()
Out[16]:
In [17]:
path = fn_df[mask].fn.values[0]
In [18]:
df_lc = pd.read_csv(path)
In [19]:
df_lc = df_lc[df_lc.source_id==source]
In [20]:
df_lc.shape
Out[20]:
Not bad! We have a 96 point lightcurve!
In [21]:
df_lc.band.value_counts()
Out[21]:
In [22]:
gi = df_lc.band == 'G'
plt.plot(df_lc.time[gi], df_lc.flux[gi], '.')
Out[22]:
The Gaia photometry is taken over 500 days! The mean starspot coverage fraction is not expected to be coherent over such large timescales. There's a portion of the data that is taken contiguously. Let's highlight those.
In [23]:
plt.plot(np.mod(df_lc.time[gi], gaia_period), df_lc.flux[gi], '.')
alt = gi & (df_lc.time >1900) & (df_lc.time<1950)
plt.plot(np.mod(df_lc.time[alt], gaia_period), df_lc.flux[alt], 'o')
Out[23]:
Seems plausible...
In [24]:
30.0*4000
Out[24]:
In [25]:
from lightkurve import KeplerTargetPixelFile
In [26]:
tpf = KeplerTargetPixelFile.from_archive(k2_source)
In [27]:
k2_lc = tpf.to_lightcurve()
k2_lc = k2_lc[(k2_lc.flux == k2_lc.flux) & np.isfinite(k2_lc.flux) & (k2_lc.flux_err == k2_lc.flux_err)]
tpf.interact(lc=k2_lc)
The full K2 postage stamp contains another source, which would have easily been separated in Gaia.
In [28]:
# %load https://www.astroml.org/gatspy/periodic/lomb_scargle-1.py
In [29]:
from gatspy import periodic
In [30]:
model = periodic.LombScargle()
model.optimizer.period_range = (0.5, 1)
model.fit(k2_lc.time, k2_lc.flux, k2_lc.flux_err)
Out[30]:
In [31]:
periods = np.linspace(0.5, 1, 10000)
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
scores = model.score(periods)
# Plot the results
fig, ax = plt.subplots(figsize=(8, 3))
fig.subplots_adjust(bottom=0.2)
ax.plot(periods, scores)
ax.set(xlabel='period (days)', ylabel='Lomb Scargle Power')
Out[31]:
In [32]:
model.best_period
Out[32]:
Gaia has 0.771791, close!
In [33]:
plt.plot(np.mod(df_lc.time[gi], 0.7779122), df_lc.flux[gi], '.')
Out[33]:
Maybe slightly better coherence than the Gaia-based estimate.