In [2]:
import sqlite3
from astropy.table import Table, vstack
from cesium import featurize
import numpy as np

In [4]:
curve = Table.read('scripts/ptf_query/byOid/curves_oid___51892000005737.tbl', format = 'ipac')
curve2 = Table.read('scripts/ptf_query/byOid/curves_oid___51892060003794.tbl', format = 'ipac')
curve3 = Table.read('scripts/ptf_query/byOid/curves_oid___51892060016842.tbl', format = 'ipac')

In [4]:
feats_to_use = ['amplitude',
                'flux_percentile_ratio_mid20', 
                'flux_percentile_ratio_mid35', 
                'flux_percentile_ratio_mid50', 
                'flux_percentile_ratio_mid65', 
                'flux_percentile_ratio_mid80', 
                'max_slope', 
                'maximum', 
                'median',
                'median_absolute_deviation', 
                'minimum',
                'percent_amplitude',
                'percent_beyond_1_std', 
                'percent_close_to_median', 
                'percent_difference_flux_percentile',
                'period_fast', 
                'qso_log_chi2_qsonu',
                'qso_log_chi2nuNULL_chi2nu',
                'skew',
                'std',
                'stetson_j',
                'stetson_k',
                'weighted_average',
                'fold2P_slope_10percentile',
                'fold2P_slope_90percentile',
                'freq1_amplitude1',
                'freq1_amplitude2',
                'freq1_amplitude3',
                'freq1_amplitude4',
                'freq1_freq',
                'freq1_lambda',
                'freq1_rel_phase2',
                'freq1_rel_phase3',
                'freq1_rel_phase4',
                'freq1_signif',
                'freq2_amplitude1',
                'freq2_amplitude2',
                'freq2_amplitude3',
                'freq2_amplitude4',
                'freq2_freq',
                'freq2_rel_phase2',
                'freq2_rel_phase3',
                'freq2_rel_phase4',
                'freq3_amplitude1',
                'freq3_amplitude2',
                'freq3_amplitude3',
                'freq3_amplitude4',
                'freq3_freq',
                'freq3_rel_phase2',
                'freq3_rel_phase3',
                'freq3_rel_phase4',
                'freq_amplitude_ratio_21',
                'freq_amplitude_ratio_31',
                'freq_frequency_ratio_21',
                'freq_frequency_ratio_31',
                'freq_model_max_delta_mags',
                'freq_model_min_delta_mags',
                'freq_model_phi1_phi2',
                'freq_n_alias',
                'freq_signif_ratio_21',
                'freq_signif_ratio_31',
                'freq_varrat',
                'freq_y_offset',
                'linear_trend',
                'medperc90_2p_p',
                'p2p_scatter_2praw',
                'p2p_scatter_over_mad',
                'p2p_scatter_pfold_over_mad',
                'p2p_ssqr_diff_over_var',
                'scatter_res_raw'
               ]

In [5]:
fset = featurize.featurize_time_series(curve['obsmjd'], curve['mag_autocorr'], curve['magerr_auto'],
                                        meta_features = {'oid': str(curve['oid'][0])}, features_to_use = feats_to_use)
fset2 = featurize.featurize_time_series(curve2['obsmjd'], curve2['mag_autocorr'], curve2['magerr_auto'],
                                        meta_features = {'oid': str(curve2['oid'][0])}, features_to_use = feats_to_use)
fset3 = featurize.featurize_time_series(curve3['obsmjd'], curve3['mag_autocorr'], curve3['magerr_auto'],
                                        meta_features = {'oid': str(curve3['oid'][0])}, features_to_use = feats_to_use)


/home/nke2/miniconda3/envs/tmpJupyter/lib/python3.6/site-packages/dask/async.py:13: UserWarning: `dask.async.get_sync` has been moved to `dask.local.get_sync`, please update your imports
  warnings.warn(_msg.format('get_sync'))
/home/nke2/miniconda3/envs/tmpJupyter/lib/python3.6/site-packages/dask/async.py:13: UserWarning: `dask.async.get_sync` has been moved to `dask.local.get_sync`, please update your imports
  warnings.warn(_msg.format('get_sync'))
/home/nke2/miniconda3/envs/tmpJupyter/lib/python3.6/site-packages/dask/async.py:13: UserWarning: `dask.async.get_sync` has been moved to `dask.local.get_sync`, please update your imports
  warnings.warn(_msg.format('get_sync'))

In [10]:
conn = sqlite3.connect('features.db')
cur = conn.cursor()

In [9]:
print(curve.colnames)


['oid', 'obsmjd', 'mag_autocorr', 'magerr_auto', 'fid', 'ra', 'dec', 'clon', 'clat']

In [55]:
cur.execute("""create table lightcurves(
    oid string primary key,
    obsmjd Float,
    mag_autocorr Float,
    magerr_auto Float,
    fid Float,
    ra Float,
    dec Float,
    clon Float,
    clat Float
    )""")


Out[55]:
<sqlite3.Cursor at 0x7fe40784bce0>

In [3]:
cur.execute("""PRAGMA table_info(lightcurves)""")
cur.fetchall()


Out[3]:
[(0, 'oid', 'string', 0, None, 1),
 (1, 'obsmjd', 'Float', 0, None, 0),
 (2, 'mag_autocorr', 'Float', 0, None, 0),
 (3, 'magerr_auto', 'Float', 0, None, 0),
 (4, 'fid', 'Float', 0, None, 0),
 (5, 'ra', 'Float', 0, None, 0),
 (6, 'dec', 'Float', 0, None, 0),
 (7, 'clon', 'Float', 0, None, 0),
 (8, 'clat', 'Float', 0, None, 0)]

In [3]:
cur.execute("""select oid from lightcurves""")
cur.fetchall()


Out[3]:
[]

In [12]:
cur.execute("""drop table sigfeats""")


Out[12]:
<sqlite3.Cursor at 0x7fc2b884e810>

In [14]:
cur.execute("""create table sigfeats(
    amplitude Float,
    flux_percentile_ratio_mid20 Float, 
    flux_percentile_ratio_mid35 Float, 
    flux_percentile_ratio_mid50 Float, 
    flux_percentile_ratio_mid65 Float, 
    flux_percentile_ratio_mid80 Float, 
    max_slope Float, 
    maximum Float, 
    median Float,
    median_absolute_deviation Float, 
    minimum Float,
    percent_amplitude Float,
    percent_beyond_1_std Float, 
    percent_close_to_median Float,
    percent_difference_flux_percentile Float,
    period_fast Float, 
    qso_log_chi2_qsonu Float,
    qso_log_chi2nuNULL_chi2nu Float,
    skew Float,
    std Float,
    stetson_j Float,
    stetson_k Float,
    weighted_average Float,
    fold2P_slope_10percentile Float,
    fold2P_slope_90percentile Float,
    freq1_amplitude1 Float,
    freq1_amplitude2 Float,
    freq1_amplitude3 Float,
    freq1_amplitude4 Float,
    freq1_freq Float,
    freq1_lambda Float,
    freq1_rel_phase2 Float,
    freq1_rel_phase3 Float,
    freq1_rel_phase4 Float,
    freq1_signif Float,
    freq2_amplitude1 Float,
    freq2_amplitude2 Float,
    freq2_amplitude3 Float,
    freq2_amplitude4 Float,
    freq2_freq Float,
    freq2_rel_phase2 Float,
    freq2_rel_phase3 Float,
    freq2_rel_phase4 Float,
    freq3_amplitude1 Float,
    freq3_amplitude2 Float,
    freq3_amplitude3 Float,
    freq3_amplitude4 Float,
    freq3_freq Float,
    freq3_rel_phase2 Float,
    freq3_rel_phase3 Float,
    freq3_rel_phase4 Float,
    freq_amplitude_ratio_21 Float,
    freq_amplitude_ratio_31 Float,
    freq_frequency_ratio_21 Float,
    freq_frequency_ratio_31 Float,
    freq_model_max_delta_mags Float,
    freq_model_min_delta_mags Float,
    freq_model_phi1_phi2 Float,
    freq_n_alias Float,
    freq_signif_ratio_21 Float,
    freq_signif_ratio_31 Float,
    freq_varrat Float,
    freq_y_offset Ffloat,
    linear_trend Float,
    medperc90_2p_p Float,
    p2p_scatter_2praw Float,
    p2p_scatter_over_mad Float,
    p2p_scatter_pfold_over_mad Float,
    p2p_ssqr_diff_over_var Float,
    scatter_res_raw Float,
    oid string unique primary key,
    sigclipped Boolean
)""")


Out[14]:
<sqlite3.Cursor at 0x7fc2b884e810>

In [15]:
cur.execute("""PRAGMA table_info(sigfeats)""")
cur.fetchall()


Out[15]:
[(0, 'amplitude', 'Float', 0, None, 0),
 (1, 'flux_percentile_ratio_mid20', 'Float', 0, None, 0),
 (2, 'flux_percentile_ratio_mid35', 'Float', 0, None, 0),
 (3, 'flux_percentile_ratio_mid50', 'Float', 0, None, 0),
 (4, 'flux_percentile_ratio_mid65', 'Float', 0, None, 0),
 (5, 'flux_percentile_ratio_mid80', 'Float', 0, None, 0),
 (6, 'max_slope', 'Float', 0, None, 0),
 (7, 'maximum', 'Float', 0, None, 0),
 (8, 'median', 'Float', 0, None, 0),
 (9, 'median_absolute_deviation', 'Float', 0, None, 0),
 (10, 'minimum', 'Float', 0, None, 0),
 (11, 'percent_amplitude', 'Float', 0, None, 0),
 (12, 'percent_beyond_1_std', 'Float', 0, None, 0),
 (13, 'percent_close_to_median', 'Float', 0, None, 0),
 (14, 'percent_difference_flux_percentile', 'Float', 0, None, 0),
 (15, 'period_fast', 'Float', 0, None, 0),
 (16, 'qso_log_chi2_qsonu', 'Float', 0, None, 0),
 (17, 'qso_log_chi2nuNULL_chi2nu', 'Float', 0, None, 0),
 (18, 'skew', 'Float', 0, None, 0),
 (19, 'std', 'Float', 0, None, 0),
 (20, 'stetson_j', 'Float', 0, None, 0),
 (21, 'stetson_k', 'Float', 0, None, 0),
 (22, 'weighted_average', 'Float', 0, None, 0),
 (23, 'fold2P_slope_10percentile', 'Float', 0, None, 0),
 (24, 'fold2P_slope_90percentile', 'Float', 0, None, 0),
 (25, 'freq1_amplitude1', 'Float', 0, None, 0),
 (26, 'freq1_amplitude2', 'Float', 0, None, 0),
 (27, 'freq1_amplitude3', 'Float', 0, None, 0),
 (28, 'freq1_amplitude4', 'Float', 0, None, 0),
 (29, 'freq1_freq', 'Float', 0, None, 0),
 (30, 'freq1_lambda', 'Float', 0, None, 0),
 (31, 'freq1_rel_phase2', 'Float', 0, None, 0),
 (32, 'freq1_rel_phase3', 'Float', 0, None, 0),
 (33, 'freq1_rel_phase4', 'Float', 0, None, 0),
 (34, 'freq1_signif', 'Float', 0, None, 0),
 (35, 'freq2_amplitude1', 'Float', 0, None, 0),
 (36, 'freq2_amplitude2', 'Float', 0, None, 0),
 (37, 'freq2_amplitude3', 'Float', 0, None, 0),
 (38, 'freq2_amplitude4', 'Float', 0, None, 0),
 (39, 'freq2_freq', 'Float', 0, None, 0),
 (40, 'freq2_rel_phase2', 'Float', 0, None, 0),
 (41, 'freq2_rel_phase3', 'Float', 0, None, 0),
 (42, 'freq2_rel_phase4', 'Float', 0, None, 0),
 (43, 'freq3_amplitude1', 'Float', 0, None, 0),
 (44, 'freq3_amplitude2', 'Float', 0, None, 0),
 (45, 'freq3_amplitude3', 'Float', 0, None, 0),
 (46, 'freq3_amplitude4', 'Float', 0, None, 0),
 (47, 'freq3_freq', 'Float', 0, None, 0),
 (48, 'freq3_rel_phase2', 'Float', 0, None, 0),
 (49, 'freq3_rel_phase3', 'Float', 0, None, 0),
 (50, 'freq3_rel_phase4', 'Float', 0, None, 0),
 (51, 'freq_amplitude_ratio_21', 'Float', 0, None, 0),
 (52, 'freq_amplitude_ratio_31', 'Float', 0, None, 0),
 (53, 'freq_frequency_ratio_21', 'Float', 0, None, 0),
 (54, 'freq_frequency_ratio_31', 'Float', 0, None, 0),
 (55, 'freq_model_max_delta_mags', 'Float', 0, None, 0),
 (56, 'freq_model_min_delta_mags', 'Float', 0, None, 0),
 (57, 'freq_model_phi1_phi2', 'Float', 0, None, 0),
 (58, 'freq_n_alias', 'Float', 0, None, 0),
 (59, 'freq_signif_ratio_21', 'Float', 0, None, 0),
 (60, 'freq_signif_ratio_31', 'Float', 0, None, 0),
 (61, 'freq_varrat', 'Float', 0, None, 0),
 (62, 'freq_y_offset', 'Ffloat', 0, None, 0),
 (63, 'linear_trend', 'Float', 0, None, 0),
 (64, 'medperc90_2p_p', 'Float', 0, None, 0),
 (65, 'p2p_scatter_2praw', 'Float', 0, None, 0),
 (66, 'p2p_scatter_over_mad', 'Float', 0, None, 0),
 (67, 'p2p_scatter_pfold_over_mad', 'Float', 0, None, 0),
 (68, 'p2p_ssqr_diff_over_var', 'Float', 0, None, 0),
 (69, 'scatter_res_raw', 'Float', 0, None, 0),
 (70, 'oid', 'string', 0, None, 1),
 (71, 'sigclipped', 'Boolean', 0, None, 0)]

In [2]:
def saveFeat (lc, tName, cur, conn): #pass in lightcurve table and cursor
    feats_to_use = [
                'amplitude',
                'flux_percentile_ratio_mid20', 
                'flux_percentile_ratio_mid35', 
                'flux_percentile_ratio_mid50', 
                'flux_percentile_ratio_mid65', 
                'flux_percentile_ratio_mid80', 
                'max_slope', 
                'maximum', 
                'median',
                'median_absolute_deviation', 
                'minimum',
                'percent_amplitude',
                'percent_beyond_1_std', 
                'percent_close_to_median', 
                'percent_difference_flux_percentile',
                'period_fast', 
                'qso_log_chi2_qsonu',
                'qso_log_chi2nuNULL_chi2nu',
                'skew',
                'std',
                'stetson_j',
                'stetson_k',
                'weighted_average',
                'fold2P_slope_10percentile',
                'fold2P_slope_90percentile',
                'freq1_amplitude1',
                'freq1_amplitude2',
                'freq1_amplitude3',
                'freq1_amplitude4',
                'freq1_freq',
                'freq1_lambda',
                'freq1_rel_phase2',
                'freq1_rel_phase3',
                'freq1_rel_phase4',
                'freq1_signif',
                'freq2_amplitude1',
                'freq2_amplitude2',
                'freq2_amplitude3',
                'freq2_amplitude4',
                'freq2_freq',
                'freq2_rel_phase2',
                'freq2_rel_phase3',
                'freq2_rel_phase4',
                'freq3_amplitude1',
                'freq3_amplitude2',
                'freq3_amplitude3',
                'freq3_amplitude4',
                'freq3_freq',
                'freq3_rel_phase2',
                'freq3_rel_phase3',
                'freq3_rel_phase4',
                'freq_amplitude_ratio_21',
                'freq_amplitude_ratio_31',
                'freq_frequency_ratio_21',
                'freq_frequency_ratio_31',
                'freq_model_max_delta_mags',
                'freq_model_min_delta_mags',
                'freq_model_phi1_phi2',
                'freq_n_alias',
                'freq_signif_ratio_21',
                'freq_signif_ratio_31',
                'freq_varrat',
                'freq_y_offset',
                'linear_trend',
                'medperc90_2p_p',
                'p2p_scatter_2praw',
                'p2p_scatter_over_mad',
                'p2p_scatter_pfold_over_mad',
                'p2p_ssqr_diff_over_var',
                'scatter_res_raw'
               ]
    string = "insert into " + tName + """ values (
            ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 
            ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
    cur.execute("""select oid from {:}""".format(tName))
    check = cur.fetchall()

    for oid in np.unique(lc['oid']):
        if (oid not in check):
            mask = np.logical_and(lc['oid'] == oid, lc['mag_autocorr'] > 0)

            fset = featurize.featurize_time_series(lc[mask]['obsmjd'], lc[mask]['mag_autocorr'], lc[mask]['magerr_auto'],
                                            meta_features = {'oid': str(oid)}, features_to_use = feats_to_use)
 
            cur.execute(string, fset.get_values()[0])
        else:
            print('Database already contains a ', oid)
    conn.commit()

In [46]:
saveFeat(curve, 'feats2', cur, conn)


/home/nke2/miniconda3/envs/tmpJupyter/lib/python3.6/site-packages/dask/async.py:13: UserWarning: `dask.async.get_sync` has been moved to `dask.local.get_sync`, please update your imports
  warnings.warn(_msg.format('get_sync'))

In [47]:
saveFeat(curve2, 'feats2', cur, conn)


/home/nke2/miniconda3/envs/tmpJupyter/lib/python3.6/site-packages/dask/async.py:13: UserWarning: `dask.async.get_sync` has been moved to `dask.local.get_sync`, please update your imports
  warnings.warn(_msg.format('get_sync'))

In [48]:
saveFeat(curve3, 'feats2', cur, conn)


/home/nke2/miniconda3/envs/tmpJupyter/lib/python3.6/site-packages/dask/async.py:13: UserWarning: `dask.async.get_sync` has been moved to `dask.local.get_sync`, please update your imports
  warnings.warn(_msg.format('get_sync'))

In [11]:
cur.execute("""select oid from feats2""")
len(cur.fetchall())


Out[11]:
17804

In [50]:
sqlite3.Connection.close(conn)