This is a DEVELOPMENT model maintained for reference. It illustrates a monotonically increasing model that uses the integral of the probit transform of a quadratic as the model.
However, with the introduction of the floor component, there was not as much problem with monotonicity.
This is an intermediate model which collects the probabilities within narrow magnitude bins and fits a quadratic polynomial model to the data as a function of CCD temperature.
The fit and plot of polynomial coefficients in each mag bin are used as starting values
in the fit_acq_prob_model-2018-04-poly-spline-tccd
notebook.
In [1]:
import sys
import os
from itertools import count
from pathlib import Path
sys.path.insert(0, str(Path(os.environ['HOME'], 'git', 'skanb', 'pea-test-set')))
import utils as asvt_utils
import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table, vstack
from astropy.time import Time
import tables
from scipy import stats
from scipy.interpolate import CubicSpline
from Chandra.Time import DateTime
from astropy.table import Table
from chandra_aca.star_probs import get_box_delta
%matplotlib inline
In [2]:
SKA = Path(os.environ['SKA'])
In [3]:
# Make a map of AGASC_ID to AGACS 1.7 MAG_ACA. The acq_stats.h5 file has whatever MAG_ACA
# was in place at the time of planning the loads.
with tables.open_file(str(SKA / 'data' / 'agasc' / 'miniagasc_1p7.h5'), 'r') as h5:
agasc_mag_aca = h5.root.data.col('MAG_ACA')
agasc_id = h5.root.data.col('AGASC_ID')
has_color3 = h5.root.data.col('RSV3') != 0 #
red_star = np.isclose(h5.root.data.col('COLOR1'), 1.5)
mag_aca_err = h5.root.data.col('MAG_ACA_ERR') / 100
red_mag_err = red_star & ~has_color3 # MAG_ACA, MAG_ACA_ERR is potentially inaccurate
In [4]:
agasc1p7_idx = {id: idx for id, idx in zip(agasc_id, count())}
agasc1p7 = Table([agasc_mag_aca, mag_aca_err, red_mag_err],
names=['mag_aca', 'mag_aca_err', 'red_mag_err'], copy=False)
In [5]:
acq_file = str(SKA / 'data' / 'acq_stats' / 'acq_stats.h5')
with tables.open_file(str(acq_file), 'r') as h5:
cols = h5.root.data.cols
names = {'tstart': 'guide_tstart',
'obsid': 'obsid',
'obc_id': 'acqid',
'halfwidth': 'halfw',
'warm_pix': 'n100_warm_frac',
'mag_aca': 'mag_aca',
'mag_obs': 'mean_trak_mag',
'known_bad': 'known_bad',
'color': 'color1',
'img_func': 'img_func',
'ion_rad': 'ion_rad',
'sat_pix': 'sat_pix',
'agasc_id': 'agasc_id',
't_ccd': 'ccd_temp',
'slot': 'slot'}
acqs = Table([getattr(cols, h5_name)[:] for h5_name in names.values()],
names=list(names.keys()))
In [6]:
year_q0 = 1999.0 + 31. / 365.25 # Jan 31 approximately
acqs['year'] = Time(acqs['tstart'], format='cxcsec').decimalyear.astype('f4')
acqs['quarter'] = (np.trunc((acqs['year'] - year_q0) * 4)).astype('f4')
In [7]:
# Create 'fail' column, rewriting history as if the OBC always
# ignore the MS flag in ID'ing acq stars.
#
# UPDATE: is ion_rad being ignored on-board? (Not as of 2018-11)
#
obc_id = acqs['obc_id']
obc_id_no_ms = (acqs['img_func'] == 'star') & ~acqs['sat_pix'] & ~acqs['ion_rad']
acqs['fail'] = np.where(obc_id | obc_id_no_ms, 0.0, 1.0)
acqs['mag_aca'] = [agasc1p7['mag_aca'][agasc1p7_idx[agasc_id]] for agasc_id in acqs['agasc_id']]
acqs['red_mag_err'] = [agasc1p7['red_mag_err'][agasc1p7_idx[agasc_id]] for agasc_id in acqs['agasc_id']]
acqs['mag_aca_err'] = [agasc1p7['mag_aca_err'][agasc1p7_idx[agasc_id]] for agasc_id in acqs['agasc_id']]
acqs['asvt'] = False
In [8]:
# Filter for year and mag (previously used data through 2007:001)
#
# UPDATE this to be between 4 to 5 years from time of recalibration.
#
# The mag range is restricted to 8.5 < mag < 10.7 because the model
# is only calibrated in that range. Above 10.7 there is concern that
# stats are actually unreliable (fraction of imposters that happen to
# is high?) This upper limit is something to play with.
#
year_min = 2014.5
year_max = DateTime('2018-10-30').frac_year
ok = ((acqs['year'] > year_min) & (acqs['year'] < year_max) &
(acqs['mag_aca'] > 7.0) & (acqs['mag_aca'] < 11) &
(~np.isclose(acqs['color'], 0.7)))
In [9]:
# Filter known bad obsids
print('Filtering known bad obsids, start len = {}'.format(np.count_nonzero(ok)))
bad_obsids = [
# Venus
2411,2414,6395,7306,7307,7308,7309,7311,7312,7313,7314,7315,7317,7318,7406,583,
7310,9741,9742,9743,9744,9745,9746,9747,9749,9752,9753,9748,7316,15292,16499,
16500,16501,16503,16504,16505,16506,16502,
]
for badid in bad_obsids:
ok = ok & (acqs['obsid'] != badid)
print('Filtering known bad obsids, end len = {}'.format(np.count_nonzero(ok)))
In [10]:
peas = Table.read('pea_analysis_results_2018_299_CCD_temp_performance.csv', format='ascii.csv')
peas = asvt_utils.flatten_pea_test_data(peas)
In [11]:
# Fuzz mag and T_ccd by a bit for plotting and fitting.
fpeas = Table([peas['star_mag'], peas['ccd_temp'], peas['search_box_hw']],
names=['mag_aca', 't_ccd', 'halfwidth'])
fpeas['year'] = np.random.uniform(2019.0, 2019.5, size=len(peas))
fpeas['color'] = 1.0
fpeas['quarter'] = (np.trunc((fpeas['year'] - year_q0) * 4)).astype('f4')
fpeas['fail'] = 1.0 - peas['search_success']
fpeas['asvt'] = True
fpeas['red_mag_err'] = False
fpeas['mag_obs'] = 0.0
In [12]:
data_all = vstack([acqs[ok]['year', 'fail', 'mag_aca', 't_ccd', 'halfwidth', 'quarter',
'color', 'asvt', 'red_mag_err', 'mag_obs'],
fpeas])
data_all.sort('year')
In [13]:
# Adjust probability (in probit space) for box size. See:
# https://github.com/sot/skanb/blob/master/pea-test-set/fit_box_size_acq_prob.ipynb
data_all['box_delta'] = get_box_delta(data_all['halfwidth'])
In [14]:
data_all = data_all.group_by('quarter')
data_all0 = data_all.copy() # For later augmentation with simulated red_mag_err stars
data_mean = data_all.groups.aggregate(np.mean)
In [15]:
plt.figure(figsize=(13, 4))
ok = data_all['asvt'] == False
plt.subplot(1, 2, 1)
plt.hist(data_all['mag_aca'][ok], bins=50)
plt.title('Mag_aca distribution')
plt.grid()
plt.subplot(1, 2, 2)
plt.hist(data_all['t_ccd'][ok] + np.random.uniform(-0.3, 0.3, np.count_nonzero(ok)), bins=24)
plt.grid()
plt.title('ACA CCD temperature distribution');
In [16]:
def p_fail_binned(pars,
t_ccd,
box_delta=0):
"""
Acquisition probability model for binned data (no mag dependence)
:param pars: 7 parameters (3 x offset, 3 x scale, p_fail for bright stars)
:param tc, tc2: t_ccd, t_ccd ** 2
:param box_delta: search box half width (arcsec)
"""
p0, p1, p2 = pars
# Make sure box_delta has right dimensions
t_ccd, box_delta = np.broadcast_arrays(t_ccd, box_delta)
tc = (t_ccd + 16) / 10
tcx = np.linspace(-0.1, 2.0, 40)
tcy = np.cumsum(stats.norm.cdf(p1 + p2 * tcx))
probit_p_fail = p0 + np.interp(tc, tcx, tcy) + box_delta
p_fail = stats.norm.cdf(probit_p_fail) # transform from probit to linear probability
return p_fail
def p_acq_fail(data=None):
"""
Sherpa fit function wrapper to ensure proper use of data in fitting.
"""
if data is None:
data = data_all
def sherpa_func(pars, x=None):
return p_fail_binned(pars, data['t_ccd'], data['box_delta'])
return sherpa_func
In [17]:
def fit_poly_model(data):
from sherpa import ui
comp_names = ['p0', 'p1', 'p2']
data_id = 1
ui.set_method('simplex')
ui.set_stat('cash')
ui.load_user_model(p_acq_fail(data), 'model')
ui.add_user_pars('model', comp_names)
ui.set_model(data_id, 'model')
ui.load_arrays(data_id, np.array(data['year']), np.array(data['fail'], dtype=np.float))
# Initial fit values from fit of all data
fmod = ui.get_model_component('model')
for comp_name in comp_names:
setattr(fmod, comp_name, 0.0)
comp = getattr(fmod, comp_name)
comp.max = 10
fmod.p0.min = -10
# fmod.p1.min = 0
# fmod.p2.min = 0
# fmod.p2.max = 0.5
ui.fit(data_id)
return ui.get_fit_results()
In [18]:
def plot_fails_mag_aca_vs_t_ccd(mag_bins, year0=None):
if year0 is None:
year0 = data_all['year'].min()
ok = (data_all['year'] >= year0) & ~data_all['fail'].astype(bool)
da = data_all[ok]
fuzzx = np.random.uniform(-0.3, 0.3, len(da))
fuzzy = np.random.uniform(-0.125, 0.125, len(da))
plt.plot(da['t_ccd'] + fuzzx, da['mag_aca'] + fuzzy, '.C0', markersize=4)
ok = (data_all['year'] > year0) & data_all['fail'].astype(bool)
da = data_all[ok]
fuzzx = np.random.uniform(-0.3, 0.3, len(da))
fuzzy = np.random.uniform(-0.125, 0.125, len(da))
plt.plot(da['t_ccd'] + fuzzx, da['mag_aca'] + fuzzy, '.C1', markersize=4, alpha=0.8)
# plt.xlim(-18, -10)
# plt.ylim(7.0, 11.1)
x0, x1 = plt.xlim()
for y in mag_bins:
plt.plot([x0, x1], [y, y], '-', color='r', linewidth=2, alpha=0.8)
plt.xlabel('T_ccd (C)')
plt.ylabel('Mag_aca')
plt.title(f'Acq successes (blue) and failures (orange) since {year0:.1f}')
plt.grid()
In [19]:
def plot_fit_grouped(pars, group_col, group_bin, mask=None, log=False, colors='br', label=None, probit=False):
data = data_all if mask is None else data_all[mask]
data['model'] = p_acq_fail(data)(pars)
group = np.trunc(data[group_col] / group_bin)
data = data.group_by(group)
data_mean = data.groups.aggregate(np.mean)
len_groups = np.diff(data.groups.indices)
data_fail = data_mean['fail']
model_fail = np.array(data_mean['model'])
fail_sigmas = np.sqrt(data_fail * len_groups) / len_groups
# Possibly plot the data and model probabilities in probit space
if probit:
dp = stats.norm.ppf(np.clip(data_fail + fail_sigmas, 1e-6, 1-1e-6))
dm = stats.norm.ppf(np.clip(data_fail - fail_sigmas, 1e-6, 1-1e-6))
data_fail = stats.norm.ppf(data_fail)
model_fail = stats.norm.ppf(model_fail)
fail_sigmas = np.vstack([data_fail - dm, dp - data_fail])
plt.errorbar(data_mean[group_col], data_fail, yerr=fail_sigmas,
fmt='.' + colors[1:], label=label, markersize=8)
plt.plot(data_mean[group_col], model_fail, '-' + colors[0])
if log:
ax = plt.gca()
ax.set_yscale('log')
In [20]:
def plot_fit_all(parvals, mask=None, probit=False):
if mask is None:
mask = np.ones(len(data_all), dtype=bool)
mt = mag_filter(8.5, 10.8) & mask
plt.figure(figsize=(12, 4))
for probit in True, False:
plt.subplot(1, 2, int(probit) + 1)
for v0, v1, colors in ((-11, -10, 'br'),
(-12, -11, 'gk'),
(-13, -12, 'cm'),
(-14, -13, 'br'),
(-15, -14, 'gk')):
plot_fit_grouped(parvals, 'mag_aca', 0.25, t_ccd_filter(v0, v1) & mt,
colors=colors, label=f'{v0} < t_ccd < {v1}', probit=probit)
plt.legend(loc='upper left')
plt.ylim(-3, 3) if probit else plt.ylim(-0.1, 1.1)
plt.ylabel('p_fail')
plt.xlabel('year')
plt.tight_layout()
plt.grid()
mt = t_ccd_filter(-16, -2) & mask
plt.figure(figsize=(12, 4))
for probit in True, False:
plt.subplot(1, 2, int(probit) + 1)
for v0, v1, colors in ((10.3, 10.7, 'gk'),
(10, 10.3, 'cm'),
(9.5, 10, 'br'),
(9, 9.5, 'gk')):
plot_fit_grouped(parvals, 'year', 0.25, mag_filter(v0, v1) & mt,
colors=colors, label=f'{v0} < mag < {v1}', probit=probit)
plt.legend(loc='upper left')
plt.ylim(-3, 3) if probit else plt.ylim(-0.1, 1.1)
plt.ylabel('p_fail')
plt.xlabel('mag_aca')
plt.tight_layout()
plt.grid()
mt = t_ccd_filter(-16, -2) & mask
plt.figure(figsize=(12, 4))
for probit in True, False:
plt.subplot(1, 2, int(probit) + 1)
for v0, v1, colors in ((10.3, 10.7, 'gk'),
(10, 10.3, 'cm'),
(9.5, 10, 'br'),
(9, 9.5, 'gk')):
plot_fit_grouped(parvals, 't_ccd', 0.5, mag_filter(v0, v1) & mt,
colors=colors, label=f'{v0} < mag < {v1}', probit=probit)
plt.legend(loc='upper left')
plt.ylim(-3, 3) if probit else plt.ylim(-0.1, 1.1)
plt.xlabel('t_ccd')
plt.ylabel('p_fail')
plt.tight_layout()
plt.grid()
In [21]:
def mag_filter(mag0, mag1):
ok = (data_all['mag_aca'] > mag0) & (data_all['mag_aca'] < mag1)
return ok
In [22]:
def t_ccd_filter(t_ccd0, t_ccd1):
ok = (data_all['t_ccd'] > t_ccd0) & (data_all['t_ccd'] < t_ccd1)
return ok
In [23]:
def wp_filter(wp0, wp1):
ok = (data_all['warm_pix'] > wp0) & (data_all['warm_pix'] < wp1)
return ok
In [24]:
mag_centers = np.array([6.3, 8.1, 9.1, 9.55, 9.75, 10.0, 10.25, 10.55, 10.75, 11.0])
mag_bins = (mag_centers[1:] + mag_centers[:-1]) / 2
mag_means = np.array([8.0, 9.0, 9.5, 9.75, 10.0, 10.25, 10.5, 10.75])
In [25]:
for m0, m1, mm in zip(mag_bins[:-1], mag_bins[1:], mag_means):
ok = (data_all['asvt'] == False) & (data_all['mag_aca'] >= m0) & (data_all['mag_aca'] < m1)
print(f"m0={m0:.2f} m1={m1:.2f} mean_mag={data_all['mag_aca'][ok].mean():.2f} vs. {mm}")
In [26]:
plot_fails_mag_aca_vs_t_ccd(mag_bins)
In [27]:
mask_no_1p5 = ((data_all['red_mag_err'] == False) &
(data_all['t_ccd'] > -16))
In [28]:
mag0s, mag1s = mag_bins[:-1], mag_bins[1:]
fits = {}
for m0, m1 in zip(mag0s, mag1s):
print(m0, m1)
fits[m0, m1] = fit_poly_model(data_all[mask_no_1p5 & mag_filter(m0, m1)])
In [30]:
colors = [f'kC{i}' for i in range(9)]
plt.figure(figsize=(13, 4))
for subplot in (1, 2):
plt.subplot(1, 2, subplot)
probit = (subplot == 2)
for m0_m1, color, mag_mean in zip(list(fits), colors, mag_means):
fit = fits[m0_m1]
m0, m1 = m0_m1
plot_fit_grouped(fit.parvals, 't_ccd', 1.0,
mask=mask_no_1p5 & mag_filter(m0, m1),
probit=probit, colors=color, label=f'{mag_mean}')
plt.grid()
plt.ylabel(('Probit ' if probit else '') + 'p_fail')
plt.xlabel('T_ccd')
if probit:
plt.ylim(-3.5, 3.5)
plt.legend(fontsize='small', loc='upper left');
In [31]:
# This computes probabilities for 120 arcsec boxes, corresponding to raw data
t_ccds = np.linspace(-16, -0, 20)
plt.figure(figsize=(13, 4))
for subplot in (1, 2):
plt.subplot(1, 2, subplot)
for m0_m1, color, mag_mean in zip(list(fits), colors, mag_means):
fit = fits[m0_m1]
m0, m1 = m0_m1
probs = p_fail_binned(fit.parvals, t_ccds)
if subplot == 2:
probs = stats.norm.ppf(probs)
plt.plot(t_ccds, probs, label=f'{mag_mean}')
plt.legend()
plt.xlabel('T_ccd')
plt.ylabel('P_fail' if subplot == 1 else 'Probit(p_fail)')
plt.grid()
In [32]:
p0s = []
p1s = []
p2s = []
for fit in fits.values():
ps = fit.parvals
p0s.append(ps[0])
p1s.append(ps[1])
p2s.append(ps[2])
plt.plot(mag_means, p0s, '.-', label='p0')
plt.plot(mag_means, p1s, '.-', label='p1')
plt.plot(mag_means, p2s, '.-', label='p2')
plt.legend(fontsize='small')
plt.grid()
In [33]:
def print_pvals(ps, idx):
vals_str = ', '.join(f'{p:.3f}' for p in ps)
print(f'p{idx}s = np.array([{vals_str}])')
In [34]:
print_pvals(p0s, 0)
print_pvals(p1s, 1)
print_pvals(p2s, 2)
In [35]:
def calc_p_fail(t_ccds, mags, halfws=120, probit=False):
"""
Acquisition probability model
:param t_ccds: CCD temperature(s)
:param mags: Star mag(s)
:param box_deltas:
"""
mag_means = np.array([8.0, 9.0, 9.5, 9.75, 10.0, 10.25, 10.5, 10.75])
p0s = np.array([-2.801, -2.555, -1.956, -1.917, -1.971, -2.082, -1.919, -0.569])
p1s = np.array([-3.150, -2.104, -2.459, -1.942, -1.495, -1.041, -0.764, -0.932])
p2s = np.array([1.375, 0.573, 1.187, 0.933, 0.506, 0.066, -0.464, -0.722])
# Make sure box_delta has right dimensions
t_ccds, mags, halfws = np.broadcast_arrays(t_ccds, mags, halfws)
tcs = (t_ccds + 16) / 10
box_deltas = get_box_delta(halfws)
tcx = np.linspace(-0.1, 2.0, 40)
p_fails = []
for tc, mag, box_delta in zip(tcs.flat, mags.flat, box_deltas.flat):
p0 = np.interp(mag, mag_means, p0s)
p1 = np.interp(mag, mag_means, p1s)
p2 = np.interp(mag, mag_means, p2s)
tcy = np.cumsum(stats.norm.cdf(p1 + p2 * tcx))
p_fail = p0 + np.interp(tc, tcx, tcy) + box_delta
if not probit:
p_fail = stats.norm.cdf(p_fail) # transform from probit to linear probability
p_fails.append(p_fail)
return np.array(p_fails).reshape(t_ccds.shape)
In [36]:
mags, t_ccds = np.mgrid[8.75:10.75:30j, -16:-4:30j]
p_fails = calc_p_fail(t_ccds, mags, probit=False)
cs = plt.contour(t_ccds, mags, p_fails, levels=[0.05, 0.2, 0.5, 0.75, 0.9], colors=['g', 'b', 'c', 'm', 'r'])
plt.clabel(cs, inline=1, fontsize=10)
plt.grid()
plt.xlim(-17, -4)
plt.ylim(8.5, 11.0)
plt.xlabel('T_ccd (degC)')
plt.ylabel('Mag_ACA')
plt.title('Failure probability');
In [ ]: