In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from sklearn import linear_model
import astropy
import functions as f
import pystan
from numpy import genfromtxt
from astropy.time import Time
import warnings
warnings.filterwarnings('ignore')
In [2]:
#Setup
#Loading data from file
acq_data = np.load('data/acq_table.npy')
#Adding fields required for analysis
acq_data = f.add_column(acq_data, 'tstart_jyear' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'tstart_quarter' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'mag_floor' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'year' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'failure' , np.zeros(len(acq_data)))
acq_data['tstart_jyear'] = Time(acq_data['tstart'], format='cxcsec').jyear
acq_data['year'] = np.floor(acq_data.tstart_jyear)
acq_data['mag_floor'] = np.floor(acq_data['mag'])
acq_data['failure'][acq_data['obc_id']=="NOID"] = 1.
for acq in acq_data:
acq.tstart_quarter = f.quarter_bin(acq.tstart_jyear)
In [3]:
def subset_range_tstart_jyear(dset, start, end):
return dset[(dset['tstart_jyear']>=start) & (dset['tstart_jyear']<=end)]
In [4]:
def subset_range_warmpix(dset, wpf):
return dset[(dset['warm_pix']>=(wpf - 0.01)) & (dset['warm_pix']<=(wpf + 0.01))]
We also need a function that estimates error bars for each bin that we'll eventually put together. Normal statistics textbooks would set up the interval as $\hat p \pm \sqrt{\frac{\hat p(1-\hat p)}{n}}$ where $n$ is the total number of trials and $\hat p$ in our case would be the sum of the failures over the total number of observations. This represents the results from the Central Limit Theorem. Unfortunately this does not work well for small $n$ or extreme probabilities, which is a majority of dataset. Thus in order to calculate our intervals, we'll use Wilson's method with a continuity correction which is better for small $n$ and extreme probabilities.
http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
In [5]:
def wilsoninterval(failures, total_count):
p = failures.astype(float)/total_count
q = 1 - p
z = 1.96
n = total_count
L = (2*n*p + z**2 - 1 - z*np.sqrt(z**2 - 2 - (1/n) + 4*p*(n*q + 1))) / (2*(n + z**2))
U = (2*n*p + z**2 + 1 + z*np.sqrt(z**2 - 2 - (1/n) + 4*p*(n*q + 1))) / (2*(n + z**2))
return U, L
Below we set up the training data for the problem. Previously I had been using a subset by year and then using that year to test the next year. I've changed the code to be more variable. Right now a years worth of data takes approximately 5-10 mins to run for the model. The entire dataset takes approximately an hour to run.
Currently, I am using data from the last three quarters of 2013 and the first quarter of 2014 to estimate the newest observations in the dataset. Right now I don't believe that we need to use the entire dataset to create our estimates. We have a lot of data in the last year that is very "similar" to our present observations, while the data at the beginning of the mission is not "similar". I can show a few plots next week to describe what I'm talking about and give a little bit more of a visual justification.
In [6]:
#Subsetting up the training data
train_start = 2013.25
train_end = 2014.25
traindata = subset_range_tstart_jyear(acq_data, train_start, train_end)
train_success = f.smlset(traindata, 'obc_id', 'ID')
train_fail = f.smlset(traindata, 'obc_id', 'NOID')
Below you can chance the Warm Pixel Fraction that we're testing. I'm subsetting the data this far up in the notebook so that we can get a visualization of what the training data looks like for the samples we're trying to estimate. Right now I'm using the newest data as out of sample test data to try and see how we'll perform in the future.
In [7]:
#Subsetting the Test Data
wp_test = 0.15
test_start = 2014.25
test_end = 2014.5
test_data = subset_range_tstart_jyear(acq_data, test_start, test_end)
wp_range = subset_range_warmpix(test_data, wp_test)
Included are the following plots:
In [8]:
# Visualizing the Training Data
nbins = np.arange(6,11.75,0.25)
wp_train_range = subset_range_warmpix(traindata, wp_test)
F = plt.figure()
p1 = plt.hist(train_success['mag'], bins=nbins, color='b', alpha=0.5, label="Successes")
p2 = plt.hist(train_fail['mag'], bins=nbins, color='r', alpha=0.5, label="Failures")
lgd = plt.legend()
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {} (All Data)'.format(train_start, train_end))
plt.grid()
plt.show()
F = plt.figure()
p1 = plt.hist(traindata['warm_pix'])
F.set_size_inches(10,3)
plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {} (All Data)'.format(train_start, train_end))
plt.grid()
plt.show()
failure_counts, bin_edges = np.histogram(train_fail['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(train_success['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(traindata['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts
pupper, plower = wilsoninterval(failure_counts, total_counts)
F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], plower, linestyle='--', color='red')
p3, = plt.plot(bin_edges[1:], pupper, linestyle='--', color='red')
plt.legend([p1, p2],
["Observed Data", "95% Interval"], loc=2)
plt.ylim((0.,1.))
plt.xlim((6.,11.))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for Time Range: {} - {} (All Data)'.format(train_start, train_end))
plt.grid()
plt.show()
failure_counts, bin_edges = np.histogram(wp_train_range[wp_train_range['obc_id']=="NOID"]['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(wp_train_range[wp_train_range['obc_id']=="ID"]['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(wp_train_range['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts
pupper, plower = wilsoninterval(failure_counts, total_counts)
F = plt.figure()
plt.hist(wp_train_range[wp_train_range['obc_id']=="ID"]['mag'], bins=nbins, color='b', alpha=0.5, label="Successes")
plt.hist(wp_train_range[wp_train_range['obc_id']=="NOID"]['mag'], bins=nbins, color='r', alpha=0.5, label="Failures")
lgd = plt.legend()
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {} (Warm Pix Subset)'.format(train_start, train_end))
plt.grid()
plt.show()
F = plt.figure()
plt.hist(wp_train_range['warm_pix'])
F.set_size_inches(10,3)
plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {} '.format(train_start, train_end))
plt.grid()
plt.show()
print """Warm Pixel Fraction Range: {0:<3.2f} - {1:3.2f}""".format(wp_test - 0.01, wp_test + 0.01)
print """Average WPF : {}\n""".format(np.mean(wp_train_range['warm_pix']))
print """{0:^15} | {1:^15} | {2:^15} | {3:^15}""".format("Mag Bin", "Percent Failed", "Total Observed", "Failure Count")
print """{}""".format("-"*70)
for i in np.arange(0, len(total_counts)):
print """{0:<15} | {1:^15.3f} | {2:^15} | {3:^15}""".format("{0:<5.2f} - {1:>5.2f}".format(nbins[i+1] - 0.25, nbins[i+1]),
100*percent_fail[i],
total_counts[i],
failure_counts[i])
F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], plower, linestyle='--', color='red')
p3, = plt.plot(bin_edges[1:], pupper, linestyle='--', color='red')
plt.xlim((8,11))
plt.ylim((0,1))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for Time Range: {} - {}'.format(train_start, train_end))
plt.grid()
plt.show()
Included are the following plots:
In [9]:
# Test Data Visualization
nbins = np.arange(6,11.75,0.25)
test_data = subset_range_tstart_jyear(acq_data, test_start, test_end)
wp_range = subset_range_warmpix(test_data, wp_test)
failure_counts, bin_edges = np.histogram(test_data[test_data['obc_id']=="NOID"]['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(test_data[test_data['obc_id']=="ID"]['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(test_data['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts
pupper, plower = wilsoninterval(failure_counts, total_counts)
F = plt.figure()
plt.hist(test_data[test_data['obc_id']=="ID"]['mag'], bins=nbins, color='b', alpha=0.5, label="Successes")
plt.hist(test_data[test_data['obc_id']=="NOID"]['mag'], bins=nbins, color='r', alpha=0.5, label="Failures")
lgd = plt.legend()
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {} (All Data)'.format(test_start, test_end))
plt.grid()
plt.show()
F = plt.figure()
p1 = plt.hist(test_data['warm_pix'])
F.set_size_inches(10,3)
plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {} (All Data)'.format(test_start, test_end))
plt.grid()
plt.show()
F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], plower, linestyle='--', color='red')
p3, = plt.plot(bin_edges[1:], pupper, linestyle='--', color='red')
plt.xlim((8,11))
plt.ylim((0,1))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for Time Range: {} - {}'.format(test_start, test_end))
plt.grid()
plt.show()
failure_counts, bin_edges = np.histogram(wp_range[wp_range['obc_id']=="NOID"]['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(wp_range[wp_range['obc_id']=="ID"]['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(wp_range['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts
pupper, plower = wilsoninterval(failure_counts, total_counts)
F = plt.figure()
plt.hist(wp_range[wp_range['obc_id']=="ID"]['mag'], bins=nbins, color='b', alpha=0.5, label="Successes")
plt.hist(wp_range[wp_range['obc_id']=="NOID"]['mag'], bins=nbins, color='r', alpha=0.5, label="Failures")
lgd = plt.legend()
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {}'.format(test_start, test_end))
plt.grid()
plt.show()
F = plt.figure()
plt.hist(wp_range['warm_pix'])
F.set_size_inches(10,3)
plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for Time Range: {} - {}'.format(test_start, test_end))
plt.grid()
plt.show()
print """Warm Pixel Fraction Range: {0:<3.2f} - {1:3.2f}""".format(wp_test - 0.01, wp_test + 0.01)
print """Average WPF : {}\n""".format(np.mean(wp_range['warm_pix']))
print """{0:^15} | {1:^15} | {2:^15} | {3:^15}""".format("Mag Bin", "Percent Failed", "Total Observed", "Failure Count")
print """{}""".format("-"*70)
for i in np.arange(0, len(total_counts)):
print """{0:<15} | {1:^15.3f} | {2:^15} | {3:^15}""".format("{0:<5.2f} - {1:>5.2f}".format(nbins[i+1] - 0.25, nbins[i+1]),
100*percent_fail[i],
total_counts[i],
failure_counts[i])
F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], plower, linestyle='--', color='red')
p3, = plt.plot(bin_edges[1:], pupper, linestyle='--', color='red')
plt.xlim((8,11))
plt.ylim((0,1))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for Time Range: {} - {}'.format(test_start, test_end))
plt.grid()
plt.show()
Below is the PyStan Model that we'll be using. Documentation can be found at the following locations:
To understand the "model" below, use the modeling documentation at the second link. The first link will allow you to understand how to access elements from the model fit as well as what's available.
NOTE: All variables are centered for computational purposes. Without centering a years worth of data goes from completing in 5 mins to completing in 70 minutes. Do NOT uncenter and rerun, you won't be happy.
Response:
Covariates:
Model:
\begin{eqnarray} y_i &\sim& Bernoulli(\Phi(\eta_i)) \\ \eta_i &=& \alpha + \beta_1 \times Mag + \beta_2 \times Mag^2 + \beta_3 \times WarmPix \\ & & + \beta_4 \times Mag \times WarmPix + \beta_5 \times Mag^2 \times WarmPix \\ \beta &\sim& N(0, 5I) \end{eqnarray}Coefficient Descriptions:
\begin{eqnarray} \alpha &:& \mbox{Intercept term (Included in beta vector)} \\ \beta_1 &:& \mbox{First Order Star Magnitude Coefficient} \\ \beta_2 &:& \mbox{Second Order Star Magnitude Coefficient} \\ \beta_3 &:& \mbox{Warm Pixel Fraction Coefficient} \\ \beta_4 &:& \mbox{First Order Interaction Coefficient} \\ \beta_5 &:& \mbox{Second Order Interaction Coefficient} \\ \end{eqnarray}Model Runtime:
Factor of number of observations and number of iterations. Currently 3000 iterations are used on 4 chains. The Stan modeling language guidelines suggest NOT using 1 chain as reference. After burn in, and merging we have about 6000 samples to play with.
In [10]:
chandra_model = """
data {
int N; // number of observations
int y[N]; // binary outcome for star acquisition event 'n'
real Mag[N]; // predictive feature - star magnitude
real WarmPix[N]; // predictive feature - warm pixel fraction
}
parameters {
real alpha; // intercept coefficient
real beta1; // Star Magnitude coefficient
real beta2;
real beta3; // Warm Pixel Fraction coefficient
real beta4; // Magnitude and WP interaction coefficient
real beta5;
}
model {
alpha ~ normal(0,5); // weakly informative
beta1 ~ normal(0,5); // Star Magnitude coefficient
beta2 ~ normal(0,5); // Star Magnitude Second Order
beta3 ~ normal(0,5); // Warm Pixel Fraction coefficient
beta4 ~ normal(0,5); // Magnitude and WP interaction coefficient
beta5 ~ normal(0,5); // M & WP Second Order Interaction
for (n in 1:N)
y[n] ~ bernoulli(Phi(alpha +
beta1 * Mag[n] +
beta2 * Mag[n] * Mag[n] +
beta3 * WarmPix[n] +
beta4 * Mag[n] * WarmPix[n] +
beta5 * Mag[n] * Mag[n] * WarmPix[n])); //
}
"""
chandra_data = {'N': int(len(traindata)),
'y':traindata["failure"].astype(int),
'Mag':traindata["mag"] - np.mean(traindata["mag"]),
'WarmPix':traindata["warm_pix"] - np.mean(traindata["warm_pix"])
}
chandra_fit = pystan.stan(model_code=chandra_model, data=chandra_data, iter=3000, chains=4)
In [11]:
chandra_fit
Out[11]:
In [12]:
chandra_fit.plot().set_size_inches(15,15)
In [13]:
beta_draws = chandra_fit.extract()
In [33]:
def plotdiagnostic(draws):
n_coefs = len(draws) - 1
nplots = n_coefs**2
names = []
for d in draws:
names.append(d)
F = plt.figure()
spot = 1
for n in names[:-1]:
for i, d in enumerate(draws):
if i == n_coefs:
break
else:
if n == d:
plt.subplot(n_coefs,n_coefs,spot)
plt.hist(draws[d], alpha=0.5)
plt.grid()
plt.xticks(rotation=45)
if spot <= n_coefs:
plt.title(d)
spot += 1
else:
plt.subplot(n_coefs,n_coefs,spot)
plt.plot(draws[n],draws[d],marker='.',linestyle='', alpha=0.3)
plt.grid()
plt.xticks(rotation=45)
if spot <= n_coefs:
plt.title(d)
spot += 1
F.set_size_inches(15,15)
plt.show()
plotdiagnostic(beta_draws)
In [15]:
a = np.mean(beta_draws['alpha'])
b1 = np.mean(beta_draws['beta1'])
b2 = np.mean(beta_draws['beta2'])
b3 = np.mean(beta_draws['beta3'])
b4 = np.mean(beta_draws['beta4'])
b5 = np.mean(beta_draws['beta5'])
betas = np.array([a, b1, b2, b3, b4, b5]) #, b3
m_center = np.mean(traindata['mag'])
wp_center = np.mean(traindata['warm_pix'])
In [16]:
def build_matrix_line(m, wp, m_center, wp_center):
mag = m - m_center
warm = wp - wp_center
return np.array([1, mag, mag**2, warm, mag*warm, mag*mag*warm]) #
In [17]:
def build_matrix_fixedWP(wp, m_center, wp_center):
mag = np.arange(5.5,13.25,0.25)
Xline = [build_matrix_line(mag[0], wp, m_center, wp_center)]
for i, m in enumerate(mag[1:],start=1):
Xline = np.vstack((Xline, build_matrix_line(mag[i], wp, m_center, wp_center)))
return Xline
In [34]:
def compare_obs_2_calc(wp_test, betas):
nbins = np.arange(6,11.75,0.25)
test_data = subset_range_tstart_jyear(acq_data, test_start, test_end)
wp_range = subset_range_warmpix(test_data, wp_test)
test_matrix = build_matrix_fixedWP(wp_test, m_center, wp_center)
y = stats.norm.cdf(np.dot(test_matrix, betas))
x = np.arange(5.5,13.25,0.25)
failure_counts, bin_edges = np.histogram(wp_range[wp_range['obc_id']=="NOID"]['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(wp_range[wp_range['obc_id']=="ID"]['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(wp_range['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts
pupper, plower = wilsoninterval(failure_counts, total_counts)
exp_fails = []
exp_sd = []
for i in np.arange(0, len(total_counts)):
mag = ((nbins[i+1] - 0.25) + nbins[i+1])/2.0
pi = stats.norm.cdf(np.dot(build_matrix_line(mag, wp_test, m_center, wp_center), betas))
exp_fail = total_counts[i] * pi
std_dev = np.sqrt(total_counts[i] * pi * (1 - pi))
exp_fails.append(exp_fail)
exp_sd.append(std_dev)
upper = np.array(exp_fails) + 2 * np.array(exp_sd)
lower = np.array(exp_fails) - 2 * np.array(exp_sd)
pltmax = np.max((np.max(upper), np.max(failure_counts)))
F = plt.figure()
plt.subplot(121)
p1, = plt.plot(x,y)
p2, = plt.plot(bin_edges[1:], percent_fail)
p3, = plt.plot(bin_edges[1:], plower, linestyle='--', color='red')
p4, = plt.plot(bin_edges[1:], pupper, linestyle='--', color='red')
plt.grid()
plt.legend([p2,p3,p1], ["Observed: WPF={}".format(wp_test), "Observed Interval", "Est. Probability Curve"], loc=2)
plt.xlabel("Star Magnitude")
plt.ylabel("Proportion Failed")
plt.xlim((8,11))
plt.subplot(122)
p1, = plt.plot(bin_edges[1:], failure_counts)
p2, = plt.plot(bin_edges[1:], exp_fails, color='r', alpha=0.5)
p3, = plt.plot(bin_edges[1:], upper, color='r', linestyle="--", alpha=0.5)
p4, = plt.plot(bin_edges[1:], lower, color='r', linestyle="--", alpha=0.5)
plt.ylim((0,pltmax))
plt.legend([p1,p2, p3], ["Observed Failures: WPF={}".format(wp_test), "Est. # Failures", "95% Interval"], loc=2)
plt.xlabel("Star Magnitude")
plt.ylabel("Number Failed")
F.set_size_inches(15,5)
plt.grid()
plt.show()
In [35]:
compare_obs_2_calc(wp_test, betas)
In [20]:
def compare_show_errors(wp_test, beta_draws, betas):
nbins = np.arange(6,11.75,0.25)
test_data = subset_range_tstart_jyear(acq_data, test_start, test_end)
wp_range = subset_range_warmpix(test_data, wp_test)
test_matrix = build_matrix_fixedWP(wp_test, m_center, wp_center)
failure_counts, bin_edges = np.histogram(wp_range[wp_range['obc_id']=="NOID"]['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(wp_range[wp_range['obc_id']=="ID"]['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(wp_range['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts
plower, pupper = wilsoninterval(failure_counts, total_counts)
F = plt.figure()
for n in np.arange(1,len(beta_draws['alpha']),1):
betas = beta_draws['alpha'][n], beta_draws['beta1'][n], beta_draws['beta2'][n], beta_draws['beta3'][n], beta_draws['beta4'][n], beta_draws['beta5'][n]
y = stats.norm.cdf(np.dot(test_matrix, betas))
x = np.arange(5.5,13.25,0.25)
plt.plot(x,y, alpha=0.5)
p1, = plt.plot(bin_edges[1:], percent_fail, linewidth=5)
p2, = plt.plot(bin_edges[1:], plower, linestyle='--', color='red', linewidth=3)
p3, = plt.plot(bin_edges[1:], pupper, linestyle='--', color='red', linewidth=3)
plt.legend([p1], ["Observed: WPF={}".format(wp_test)], loc=2)
plt.xlabel("Star Magnitude")
plt.ylabel("Proportion Failed")
plt.xlim((8,11))
F.set_size_inches(15,5)
plt.grid()
plt.show()
print """Warm Pixel Fraction Range: {0:<3.2f} - {1:3.2f}""".format(wp_test - 0.01, wp_test + 0.01)
print """Average WPF : {}\n""".format(np.mean(wp_range['warm_pix']))
print """{0:^15} | {1:^15} | {2:^15} | {3:^15} | {4:^15} | {5:^15}""".format("Mag Bin",
"Percent Failed",
"Total Observed",
"Failure Count",
"Expected Fails",
"1-Std Dev")
print """{}""".format("-"*105)
for i in np.arange(0, len(total_counts)):
mag = ((nbins[i+1] - 0.25) + nbins[i+1])/2.0
pi = stats.norm.cdf(np.dot(build_matrix_line(mag, wp_test, m_center, wp_center), betas))
exp_fail = total_counts[i] * pi
std_dev = np.sqrt(total_counts[i] * pi * (1 - pi))
print """{0:<15} | {1:^15.3f} | {2:^15} | {3:^15} | {4:^15.1f} | {5:^15}""".format("{0:<5.2f} - {1:>5.2f}".format(nbins[i+1] - 0.25, nbins[i+1]),
100*percent_fail[i],
total_counts[i],
failure_counts[i],
exp_fail,
"+/- {:<4.1f}".format(std_dev))
In [21]:
compare_show_errors(wp_test, beta_draws, betas)
In [22]:
allbetas = np.vstack((beta_draws['alpha'], beta_draws['beta1'], beta_draws['beta2'], beta_draws['beta3'], beta_draws['beta4'], beta_draws['beta5']))
F = plt.figure()
plt.hist(stats.norm.cdf(np.dot(build_matrix_line(10.6,wp_test, m_center, wp_center), allbetas)), bins=25, normed=True)
plt.xlim((0,1))
plt.grid()
plt.show()
In [37]:
compare_obs_2_calc(0.11, betas)
compare_show_errors(0.11, beta_draws, betas)
In [38]:
compare_obs_2_calc(0.12, betas)
compare_show_errors(0.12, beta_draws, betas)
In [39]:
compare_obs_2_calc(0.13, betas)
compare_show_errors(0.13, beta_draws, betas)
In [40]:
compare_obs_2_calc(0.14, betas)
compare_show_errors(0.14, beta_draws, betas)
In [41]:
compare_obs_2_calc(0.15, betas)
compare_show_errors(0.15, beta_draws, betas)
In [42]:
compare_obs_2_calc(0.16, betas)
compare_show_errors(0.16, beta_draws, betas)
In [43]:
compare_obs_2_calc(0.17, betas)
compare_show_errors(0.17, beta_draws, betas)
In [44]:
compare_obs_2_calc(0.18, betas)
compare_show_errors(0.18, beta_draws, betas)
In [45]:
beta_results = {'names':np.array(['alpha', 'beta1', 'beta2', 'beta3', 'beta4', 'beta5']),
'means':betas,
'draws':beta_draws,
'betamatrix':np.vstack((beta_draws['alpha'],
beta_draws['beta1'],
beta_draws['beta2'],
beta_draws['beta3'],
beta_draws['beta4'],
beta_draws['beta5'])),
'centers':{'mag':m_center,
'warmpix':wp_center}}
In [46]:
import pickle
with open('chandra_dump.pkl', 'wb') as f:
pickle.dump(beta_results, f)