In [1]:
from __future__ import division

import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table
from astropy.time import Time
import tables
from scipy import stats

%matplotlib inline

In [2]:
SOTA2015_FIT_ALL = [9.0122372262846735,
 6.5882697680183346,
 -0.77727342693609758,
 -2.1477049943999713,
 0.67132336969927486,
 0.33209623740442562]

In [3]:
SOTA2015_FIT_NO_1P5 = [9.6887121629584954,
 9.1613039805725531,
 -0.41919346946809299,
 -2.3829996969445664,
 0.54998935209070199,
 0.47839261003413558,
 0.0060869131558326916]

In [4]:
SOTA2015_FIT_ONLY_1P5 = [8.5417092977479818,
 0.4448269654965103,
 -3.5137851315167579,
 -1.350542439869622,
 1.527806122455764,
 0.30973568451354011,
 0.005414847154007865]

In [5]:
with tables.openFile('/proj/sot/ska/data/acq_stats/acq_stats.h5', 'r') as h5:
    cols = h5.root.data.cols
    names = {'tstart': 'guide_tstart',
             'obsid': 'obsid',
             'obc_id': 'acqid',
             'warm_pix': 'n100_warm_frac',
             'mag': 'mag_aca',
             'known_bad': 'known_bad',
             'color': 'color1'}
    acqs = Table([getattr(cols, h5_name)[:] for h5_name in names.values()],
                 names=list(names.keys()))

    year_q0 = 1999.0 + 31. / 365.25  # Jan 31 approximately
acqs['year'] = Time(acqs['tstart'], format='cxcsec').decimalyear.astype('f4')
acqs['quarter'] = (np.trunc((acqs['year'] - year_q0) * 4)).astype('f4')
acqs['fail'] = 1.0 - acqs['obc_id'].astype(np.int)
acqs['color_1p5'] = np.where(acqs['color'] == 1.5, 1, 0)

In [6]:
data_all = acqs.group_by('quarter')
data_all['mag10'] = data_all['mag'] - 10.0
data_all.sort('year')
ok = (data_all['year'] > 2007) & (data_all['mag'] > 6.0) & (data_all['mag'] < 11.0)
data_all = data_all[ok]
data_all = data_all.group_by('quarter')
data_mean = data_all.groups.aggregate(np.mean)

ok = np.ones(len(data_all), dtype=bool)
print('Filtering known bad obsids, start len = {}'.format(len(data_all)))
bad_obsids = [
    # Venus
    2411,2414,6395,7306,7307,7308,7309,7311,7312,7313,7314,7315,7317,7318,7406,583,
    7310,9741,9742,9743,9744,9745,9746,9747,9749,9752,9753,9748,7316,15292,16499,
    16500,16501,16503,16504,16505,16506,16502,
    ]
for badid in bad_obsids:
    ok = ok & (data_all['obsid'] != badid)
data_all = data_all[ok]
print('Filtering known bad obsids, end len = {}'.format(len(data_all)))


Filtering known bad obsids, start len = 108646
Filtering known bad obsids, end len = 108486

In [7]:
# SET DATA FILTERING for this run (TO DO: refactor into a function)
data_all = data_all[data_all['color'] != 1.5]
data_all


Out[7]:
<Table masked=False length=88919>
obc_idknown_badtstartcolorwarm_pixmagobsidyearquarterfailcolor_1p5mag10
boolboolfloat64float64float64float64int64float32float32float64int64float64
TrueFalse284017659.6811.478999853130.0375362394058.92266273499586132007.031.00.00-1.07733726501
TrueFalse284017659.6811.043799757960.0375362394058.83009433746586132007.031.00.00-1.16990566254
TrueFalse284017659.6811.490899801250.0375362394059.53129386902586132007.031.00.00-0.468706130981
TrueFalse284017659.6810.5465500950810.0375362394057.90769767761586132007.031.00.00-2.09230232239
TrueFalse284017659.6810.598400413990.0375362394059.28877162933586132007.031.00.00-0.711228370667
TrueFalse284017659.6810.2312002629040.0375362394058.9228105545586132007.031.00.00-1.0771894455
TrueFalse284017659.6811.106700062750.0375362394059.7141456604586132007.031.00.00-0.2858543396
TrueFalse284019533.3810.9094997644420.03751133196338.96426963806586122007.031.00.00-1.03573036194
TrueFalse284019533.3810.4734501242640.03751133196339.33366012573586122007.031.00.00-0.666339874268
TrueFalse284019533.3811.256299734120.03751133196336.77489948273586122007.031.00.00-3.22510051727
....................................
TrueFalse570520664.9210.3926998972890.1902676091497.45233917236512932016.0867.00.00-2.54766082764
TrueFalse570520664.9210.5593000054360.1902676091499.44221115112512932016.0867.00.00-0.557788848877
TrueFalse570520664.9210.481099992990.1902676091499.88777065277512932016.0867.00.00-0.112229347229
TrueFalse570536864.0220.9647501707080.190275233028.15521717072512922016.0867.00.00-1.84478282928
TrueFalse570536864.022-0.09265017509460.190275233027.68175315857512922016.0867.00.00-2.31824684143
TrueFalse570536864.0221.370199561120.190275233028.42284297943512922016.0867.00.00-1.57715702057
TrueFalse570536864.0220.0331504344940.190275233029.02678585052512922016.0867.00.00-0.973214149475
TrueFalse570536864.0220.2192995101210.190275233029.23766040802512922016.0867.00.00-0.76233959198
TrueFalse570536864.0220.5958501696590.190275233028.5012140274512922016.0867.00.00-1.4987859726
TrueFalse570536864.022-0.04759979248050.190275233029.24546527863512922016.0867.00.00-0.754534721375

In [8]:
def p_acq_fail(pars, x, data=None, mag=None, wp=None):
    if data is None:
        data = data_all
        
    m10 = data['mag10'] if mag is None else mag - 10.0
    if wp is None:
        wp = data['warm_pix']
        
    scl0, scl1, scl2 = pars[0:3]
    off0, off1, off2 = pars[3:6]
    p_bright_fail = pars[6]
    
    scale = scl0 + scl1 * m10 + scl2 * m10**2
    offset = off0 + off1 * m10 + off2 * m10**2  # + off_color1p5 * acq_data['color_1p5']
    
    p_fail = offset + scale * wp
    p_fail = stats.norm.cdf(p_fail)  # probit transform
    p_fail[m10 < -1.5] = p_bright_fail  # For stars brighter than 8.5 mag use a constant
    
    return p_fail

In [9]:
def fit_sota_model():
    from sherpa import ui

    data_id = 1
    ui.set_method('simplex')
    ui.set_stat('cash')
    ui.load_user_model(p_acq_fail, 'model')
    ui.add_user_pars('model', ['scl0', 'scl1', 'scl2', 'off0', 'off1', 'off2', 'p_bright_fail'])
    ui.set_model(data_id, 'model')
    ui.load_arrays(data_id, np.array(data_all['year']), np.array(data_all['fail'], dtype=np.float))

    # Initial fit values from SOTA 2013 presentation (modulo typo there)
    start_vals = iter(SOTA2015_FIT_ALL)  # Offset
    fmod = ui.get_model_component('model')
    for name in ('scl', 'off'):
        for num in (0, 1, 2):
            comp_name = name + str(num)
            setattr(fmod, comp_name, start_vals.next())
            comp = getattr(fmod, comp_name)
            comp.min = -100000
            comp.max = 100000
            # ui.freeze(comp)
    
    fmod.p_bright_fail = 0.025
    fmod.p_bright_fail.min = 0.0
    fmod.p_bright_fail.max = 1.0
    # ui.freeze(fmod.p_bright_fail)
    
    ui.fit(data_id)
    # conf = ui.get_confidence_results()
    return ui.get_fit_results()

In [10]:
def plot_fit_grouped(pars, group_col, group_bin, mask=None, log=False, colors='br', label=None):
    if mask is None:
        mask = np.ones(len(data_all), dtype=bool)
    data = data_all[mask]
    data['model'] = p_acq_fail(pars, None, data)

    group = np.trunc(data[group_col] / group_bin)
    data = data.group_by(group)
    data_mean = data.groups.aggregate(np.mean)
    len_groups = np.diff(data.groups.indices)
    fail_sigmas = np.sqrt(data_mean['fail'] * len_groups) / len_groups
    
    plt.errorbar(data_mean[group_col], data_mean['fail'], yerr=fail_sigmas, fmt='.' + colors[0], label=label)
    plt.plot(data_mean[group_col], data_mean['model'], '-' + colors[1])
    if log:
        ax = plt.gca()
        ax.set_yscale('log')

In [11]:
def mag_filter(mag0, mag1):
    ok = (data_all['mag'] > mag0) & (data_all['mag'] < mag1)
    return ok

In [12]:
def wp_filter(wp0, wp1):
    ok = (data_all['warm_pix'] > wp0) & (data_all['warm_pix'] < wp1)
    return ok

In [13]:
print('Hang tight, this will take a few minutes')
fit = fit_sota_model()


Hang tight, this will take a few minutes
Dataset               = 1
Method                = neldermead
Statistic             = cash
Initial fit statistic = 21087.1
Final fit statistic   = 20364.6 at function evaluation 1263
Data points           = 88919
Degrees of freedom    = 88912
Change in statistic   = 722.528
   model.scl0     9.36664     
   model.scl1     8.60574     
   model.scl2     -0.902867   
   model.off0     -2.36035    
   model.off1     0.590629    
   model.off2     0.511768    
   model.p_bright_fail   0.0059554   
INFO:sherpa.ui.utils:Dataset               = 1
Method                = neldermead
Statistic             = cash
Initial fit statistic = 21087.1
Final fit statistic   = 20364.6 at function evaluation 1263
Data points           = 88919
Degrees of freedom    = 88912
Change in statistic   = 722.528
   model.scl0     9.36664     
   model.scl1     8.60574     
   model.scl2     -0.902867   
   model.off0     -2.36035    
   model.off1     0.590629    
   model.off2     0.511768    
   model.p_bright_fail   0.0059554   

In [14]:
print(fit)


datasets       = (1,)
itermethodname = none
methodname     = neldermead
statname       = cash
succeeded      = True
parnames       = ('model.scl0', 'model.scl1', 'model.scl2', 'model.off0', 'model.off1', 'model.off2', 'model.p_bright_fail')
parvals        = (9.3666362229120406, 8.605735251264182, -0.90286728615046496, -2.3603499957772285, 0.59062854628381856, 0.5117683329382009, 0.0059554037295721082)
statval        = 20364.5624894
istatval       = 21087.0900145
dstatval       = 722.527525164
numpoints      = 88919
dof            = 88912
qval           = None
rstat          = None
message        = Optimization terminated successfully
nfev           = 1263

In [15]:
parvals = [par.val for par in model.pars]
parvals


Out[15]:
[9.3666362229120406,
 8.605735251264182,
 -0.90286728615046496,
 -2.3603499957772285,
 0.59062854628381856,
 0.5117683329382009,
 0.0059554037295721082]

In [25]:
plot_fit_grouped(parvals, 'mag', 0.25, wp_filter(0.10, 0.20), log=False, colors='cm', label='0.10 < WP < 0.2')
plot_fit_grouped(parvals, 'mag', 0.25, wp_filter(0.0, 0.10), log=False, colors='br', label='0 < WP < 0.10')
plt.legend(loc='best');
plt.ylim(0.0, 1.0);
plt.xlim(9, 11)
plt.grid();



In [17]:
plot_fit_grouped(parvals, 'warm_pix', 0.02, mag_filter(10, 10.6), log=True, colors='cm', label='10 < mag < 10.6')
plot_fit_grouped(parvals, 'warm_pix', 0.02, mag_filter(9, 10), log=True, colors='br', label='9 < mag < 10')
plt.legend(loc='best');



In [18]:
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(10, 10.6), colors='cm', label='10 < mag < 10.6')
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(9.5, 10), colors='br', label='9.5 < mag < 10')
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(9.0, 9.5),  colors='gk', label='9.0 < mag < 9.5')
plt.legend(loc='best');



In [19]:
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(10, 10.6), colors='cm', label='10 < mag < 10.6', log=True)
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(9.5, 10), colors='br', label='9.5 < mag < 10', log=True)
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(9.0, 9.5),  colors='gk', label='9.0 < mag < 9.5', log=True)
plt.legend(loc='best');



In [20]:
ok = data_all['color'] == 1.5
if np.any(ok):
    plot_fit_grouped(parvals, 'year', 0.5, mag_filter(10, 10.6) & ok, colors='cm', label='10 < mag < 10.6')
    plot_fit_grouped(parvals, 'year', 0.5, mag_filter(9.5, 10) & ok, colors='br', label='9.5 < mag < 10')
    plot_fit_grouped(parvals, 'year', 0.5, mag_filter(9.0, 9.5) & ok,  colors='gk', label='9.0 < mag < 9.5')
    plt.legend(loc='best');

In [21]:
model.pars


Out[21]:
(<Parameter 'scl0' of model 'model'>,
 <Parameter 'scl1' of model 'model'>,
 <Parameter 'scl2' of model 'model'>,
 <Parameter 'off0' of model 'model'>,
 <Parameter 'off1' of model 'model'>,
 <Parameter 'off2' of model 'model'>,
 <Parameter 'p_bright_fail' of model 'model'>)

In [22]:
parvals


Out[22]:
[9.3666362229120406,
 8.605735251264182,
 -0.90286728615046496,
 -2.3603499957772285,
 0.59062854628381856,
 0.5117683329382009,
 0.0059554037295721082]

In [23]:
p_acq_fail(SOTA2015_FIT_ONLY_1P5, None, data=None, mag=np.array([9.0]), wp=0.18)


Out[23]:
array([ 0.0406096])

In [ ]: