Chandra ACA Analysis

Initial data setup....


In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import bayesRegress as bR
from sklearn import linear_model
import astropy
import functions as f

from numpy import genfromtxt
from astropy.time import Time

import warnings
warnings.filterwarnings('ignore')

In [ ]:
# Found using R's GLM function!

# bayes_fit = bR.probitRegression('success ~ mag + warm_pix + mag:warm_pix', yr2013, n_iter=1000, burnin=100, calcIRWLS=True, plots=True, identifier="acadata2013")
finney47 = np.genfromtxt('data/finney1947.csv', dtype=None, delimiter=',', names=True)
fin_fit = bR.probitRegression('Y ~ Volume + Rate', finney47, n_iter=5000, burnin=1000, calcIRWLS=False, plots=False, identifier='finney')

In [2]:
#Setup
#Loading data from file
acq_data = np.load('data/acq_table.npy')

#Adding fields required for analysis
acq_data = f.add_column(acq_data, 'tstart_jyear' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'tstart_quarter' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'mag_floor' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'year' , np.zeros(len(acq_data)))
acq_data = f.add_column(acq_data, 'success' , np.zeros(len(acq_data)))
acq_data['tstart_jyear'] = Time(acq_data['tstart'], format='cxcsec').jyear
acq_data['year'] = np.floor(acq_data.tstart_jyear)
acq_data['mag_floor'] = np.floor(acq_data['mag'])
acq_data['success'][acq_data['obc_id']=="NOID"] = 1.
# acq_data['tstart_quarter'] = f.quarter_bin(acq['tstart_jyear'])

for acq in acq_data:
    acq.tstart_quarter = f.quarter_bin(acq.tstart_jyear)

In [3]:
#Subsetting the data to a particular year
yr2013 = f.smlset(acq_data, 'year', 2012)
yr2013_s = f.smlset(yr2013, 'obc_id', 'ID')
yr2013_f = f.smlset(yr2013, 'obc_id', 'NOID')

In [4]:
F = plt.figure()
p1, = plt.plot(yr2013_s['mag'], yr2013_s['warm_pix'], marker='.', linestyle='')
p2, = plt.plot(yr2013_f['mag'], yr2013_f['warm_pix'], marker='x', linestyle='', color='red')
plt.legend([p1,p2],['Successes', "Failures"])
plt.title('Star Acquisitions for the Year 2013')
plt.xlabel('Star Magnitude')
plt.ylabel('Warm Pixel Fraction')
F.set_size_inches(10,6)
F.savefig('yr2013_warmpix_by_mag.pdf', type='pdf')
plt.show()



In [5]:
nbins = np.arange(6,11.75,0.25)

F = plt.figure()
p1 = plt.hist(yr2013_s['mag'], bins=nbins, color='b', alpha=0.5, label="Successes")
p2 = plt.hist(yr2013_f['mag'], bins=nbins, color='r', alpha=0.5, label="Failures")
lgd = plt.legend()
F.set_size_inches(10,3)
plt.xlim((6.,11.))
plt.xlabel('Star Magnitude')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for the Year 2013')
F.savefig('2013_histogram.pdf',type='pdf', bbox_extra_artist=[lgd])
plt.show()

print(np.mean(yr2013['warm_pix']))

failure_counts, bin_edges = np.histogram(yr2013_f['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts

def curProb(mag, N100):
    mag = mag - 10.
    scale = 10**(0.18 + 0.99*mag - 0.49*mag*mag)
    offset = 10**(-1.49 + 0.89*mag + 0.28*mag*mag)
    return scale * N100 + offset

design = np.vstack((np.ones(len(yr2013['success'])), 
                    yr2013['mag'], 
                    yr2013['mag']**2)).transpose()

response = yr2013['success']
test = np.vstack((np.ones(len(bin_edges[1:])), 
                  bin_edges[1:], 
                  bin_edges[1:]**2)).transpose()

nlf2 = linear_model.LogisticRegression(C=1e10)
nlf2.fit(design,response)

nlf1 = linear_model.LogisticRegression(C=1e10)
nlf1.fit(design[:,:2],response)

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.18), linestyle="--")
p3, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.12), linestyle="--")
p4, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.06), linestyle="--")
p5, = plt.plot(bin_edges[1:], nlf1.predict_proba(test[:,:2])[:,1], linestyle="dotted")
p6, = plt.plot(bin_edges[1:], nlf2.predict_proba(test)[:,1], linestyle="dotted")
plt.legend([p1,p2,p3,p4,p5,p6],
           ["Observed Data", "Current Model: WP=0.18",
            "Current Model: WP=0.12", "Current Model: WP=0.06",
            "Logistic Regression:1st Order", "Logistic Regression:2nd Order"], 
           bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylim((0.,1.))
plt.xlim((6.,11.))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for the Year 2013')
plt.show()

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
plt.legend([p1],
           ["Observed Data"], loc=2)
plt.ylim((0.,1.))
plt.xlim((6.,11.))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for the Year 2013')
F.savefig('2013_proportions_mag.pdf',type="pdf")
plt.show()

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
# p2, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.18), linestyle="--")
p3, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.12), linestyle="--")
# p4, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.06), linestyle="--")
plt.legend([p1,p3],
           ["Observed Data",
            "Model: WP=0.12"], loc=2)
plt.ylim((0.,1.))
plt.xlim((8.,11.))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for the Year 2013')
F.savefig('2013_current_model_fit_mag.pdf', type='pdf')
plt.show()


0.0863974281574

In [9]:
nbins = np.arange(0.05,0.18,0.01)

F = plt.figure()
p1 = plt.hist(yr2013_s['warm_pix'], bins=nbins, color='b', alpha=0.5, label="Successes")
p2 = plt.hist(yr2013_f['warm_pix'], bins=nbins, color='r', alpha=0.5, label="Failures")
plt.legend(loc=1)

plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Frequency')
plt.title('Star Acquisitions for the Year 2013')
F.set_size_inches(10,3)
F.savefig('2013_histogram_wpf.pdf', type='pdf')
plt.show()

#All Stars
failure_counts, bin_edges = np.histogram(yr2013_f['warm_pix'], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['warm_pix'], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['warm_pix'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts

#Only 9 - 9.5 Magnitude
failure_counts, bin_edges = np.histogram(yr2013_f['warm_pix'][(yr2013_f['mag']>=9.0) & (yr2013_f['mag']<=9.5)], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['warm_pix'][(yr2013_s['mag']>=9.0) & (yr2013_s['mag']<=9.5)], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['warm_pix'][(yr2013['mag']>=9.0) & (yr2013['mag']<=9.5)], bins=nbins)
percent_fail_9_5 = failure_counts.astype(float) / total_counts

print percent_fail_9_5

#Only 9.5 - 10 Magnitude
failure_counts, bin_edges = np.histogram(yr2013_f['warm_pix'][(yr2013_f['mag']>=9.5) & (yr2013_f['mag']<=10.)], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['warm_pix'][(yr2013_s['mag']>=9.5) & (yr2013_s['mag']<=10.)], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['warm_pix'][(yr2013['mag']>=9.5) & (yr2013['mag']<=10.)], bins=nbins)
percent_fail_10 = failure_counts.astype(float) / total_counts

print percent_fail_10

#Only 10 - 10.5 Magnitude
failure_counts, bin_edges = np.histogram(yr2013_f['warm_pix'][(yr2013_f['mag']>=10.) & (yr2013_f['mag']<=10.5)], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['warm_pix'][(yr2013_s['mag']>=10.) & (yr2013_s['mag']<=10.5)], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['warm_pix'][(yr2013['mag']>=10.) & (yr2013['mag']<=10.5)], bins=nbins)
percent_fail_10_5 = failure_counts.astype(float) / total_counts

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], percent_fail_9_5)
p3, = plt.plot(bin_edges[1:], percent_fail_10)
p4, = plt.plot(bin_edges[1:], percent_fail_10_5)
p5, = plt.plot(bin_edges[1:], curProb(9.25, bin_edges[1:]), linestyle="--")
p6, = plt.plot(bin_edges[1:], curProb(9.75,bin_edges[1:]), linestyle="--")
p7, = plt.plot(bin_edges[1:], curProb(10.25,bin_edges[1:]), linestyle="--")
plt.legend([p1,p2,p3,p4,p5,p6,p7],
           ["Observed (All Stars)", "Observed (Mag:9-9.5)",
             "Observed (Mag:9.5-10)",  "Observed (Mag:10-10.5)",
             "Model: Mag=9.25","Model: Mag=9.75","Model: Mag=10.25"], loc=2)
plt.ylim((0.,1.))
F.set_size_inches(10,3)
plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for the Year 2013')
F.savefig('2013_current_model_fit_wpf.pdf', type='pdf')
plt.show()

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], percent_fail_9_5)
p3, = plt.plot(bin_edges[1:], percent_fail_10)
p4, = plt.plot(bin_edges[1:], percent_fail_10_5)
plt.legend([p1,p2,p3,p4],
           ["Observed (All Stars)", "Observed (Mag:9-9.5)",
             "Observed (Mag:9.5-10)",  "Observed (Mag:10-10.5)"],
            loc=2)
plt.ylim((0.,1.))
F.set_size_inches(10,3)
plt.xlabel('Warm Pixel Fraction')
plt.ylabel('Proportion Failed')
plt.title('Star Acquisitions for the Year 2013')
F.savefig('2013_proportions_wpf.pdf',type='pdf')
plt.show()


[        nan  0.01212121  0.02608696  0.01692866  0.01792574  0.01790281
  0.02247191  0.                 nan         nan         nan         nan]
[        nan  0.02332362  0.04745763  0.05240175  0.04661017  0.04892966
  0.09615385  0.                 nan         nan         nan         nan]

Adding other terms to the fit...


In [10]:
nbins = np.arange(6,11.75,0.25)

failure_counts, bin_edges = np.histogram(yr2013_f['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts

def getpercentfail_byWPF(low,upp):
    failure_counts, bin_edges = np.histogram(yr2013_f['mag'][(yr2013_f['warm_pix']>=low) & (yr2013_f['warm_pix']<=upp)], bins=nbins)
    success_counts, bin_edges = np.histogram(yr2013_s['mag'][(yr2013_s['warm_pix']>=low) & (yr2013_s['warm_pix']<=upp)], bins=nbins)
    total_counts, bin_edges = np.histogram(yr2013['mag'][(yr2013['warm_pix']>=low) & (yr2013['warm_pix']<=upp)], bins=nbins)
    percent_fail = failure_counts.astype(float) / total_counts
    return percent_fail, bin_edges

design = np.vstack((np.ones(len(yr2013['success'])), 
                    yr2013['mag'],
                    yr2013['warm_pix'],
                    yr2013['mag']*yr2013['warm_pix'])).transpose()

response = yr2013['success']

nlf1 = linear_model.LogisticRegression(C=1e10)
nlf1.fit(design[:,:2],response)

nlf2 = linear_model.LogisticRegression(C=1e10)
nlf2.fit(design,response)


def maketestpts(warm_pix):
    fixed_warm_pix = warm_pix*np.ones(len(bin_edges[1:]))
    test = np.vstack((np.ones(len(bin_edges[1:])), 
                      bin_edges[1:],
                      fixed_warm_pix,
                      bin_edges[1:]*fixed_warm_pix
                      )).transpose()
    return test

pf_0_15, be = getpercentfail_byWPF(0.14,0.16)
pf_0_12, be = getpercentfail_byWPF(0.11,0.13)
pf_0_09, be = getpercentfail_byWPF(0.08,0.10)

tst_0_09 = maketestpts(0.09)
tst_0_12 = maketestpts(0.12)
tst_0_15 = maketestpts(0.15)

print tst_0_12

print nlf2.coef_
print nlf2.get_params()
print nlf2.predict_proba(tst_0_12)[:,1]
print bin_edges[1:]

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail, linestyle="dotted")
p2, = plt.plot(bin_edges[1:], pf_0_09, linestyle="dotted")
p3, = plt.plot(bin_edges[1:], pf_0_12, linestyle="dotted")
p4, = plt.plot(bin_edges[1:], pf_0_15, linestyle="dotted")
p5, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.12), linestyle="--")
p6, = plt.plot(bin_edges[1:], nlf2.predict_proba(tst_0_09)[:,1])
p7, = plt.plot(bin_edges[1:], nlf2.predict_proba(tst_0_12)[:,1])
p8, = plt.plot(bin_edges[1:], nlf2.predict_proba(tst_0_15)[:,1])
plt.legend([p1,p2,p3,p4,p5,p6,p7,p8],
           ["Observed Data (All)", "Observed Data (WP:0.08-0.10)",
            "Observed Data (WP:0.11-0.13)", "Observed Data (WP:0.14-0.16)",
            "Current Model: WP=0.12",
            "Logistic Regression: WP=0.09", "Logistic Regression: WP=0.12",
            "Logistic Regression: WP=0.15"], 
           bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylim((0.,1.))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Fit using first order terms plus Interaction')
plt.show()

def getpercentfail_byMAG(low,upp):
    nbins = np.arange(0.05,0.18,0.01)
    failure_counts, bin_edges = np.histogram(yr2013_f['warm_pix'][(yr2013_f['mag']>=low) & (yr2013_f['mag']<=upp)], bins=nbins)
    success_counts, bin_edges = np.histogram(yr2013_s['warm_pix'][(yr2013_s['mag']>=low) & (yr2013_s['mag']<=upp)], bins=nbins)
    total_counts, bin_edges = np.histogram(yr2013['warm_pix'][(yr2013['mag']>=low) & (yr2013['mag']<=upp)], bins=nbins)
    percent_fail = failure_counts.astype(float) / total_counts
    return percent_fail, bin_edges

def maketestptsMAG(mag):
    fixed_mag = mag*np.ones(len(bin_edges[1:]))
    test = np.vstack((np.ones(len(bin_edges[1:])), 
                      fixed_mag,
                      bin_edges[1:],
                      bin_edges[1:]*fixed_mag
                      )).transpose()
    return test

percent_fail, bin_edges = getpercentfail_byMAG(0.0,20.)
ninefive, bin_edges = getpercentfail_byMAG(9.0,9.5)
tenoh, bin_edges = getpercentfail_byMAG(9.5,10.0)
tenfive, bin_edges = getpercentfail_byMAG(10.,10.5)
tst_9_5 = maketestptsMAG(9.5)
tst_10 = maketestptsMAG(10.)
tst_10_5 = maketestptsMAG(10.5)

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail, linestyle="dotted")
p2, = plt.plot(bin_edges[1:], ninefive, linestyle="dotted")
p3, = plt.plot(bin_edges[1:], tenoh, linestyle="dotted")
p4, = plt.plot(bin_edges[1:], tenfive, linestyle="dotted")
p5, = plt.plot(bin_edges[1:], curProb(9.5, bin_edges[1:]), linestyle="--")
p6, = plt.plot(bin_edges[1:], curProb(10.,bin_edges[1:]), linestyle="--")
p7, = plt.plot(bin_edges[1:], curProb(10.5,bin_edges[1:]), linestyle="--")
p8, = plt.plot(bin_edges[1:], nlf2.predict_proba(tst_9_5)[:,1])
p9, = plt.plot(bin_edges[1:], nlf2.predict_proba(tst_10)[:,1])
p10, = plt.plot(bin_edges[1:], nlf2.predict_proba(tst_10_5)[:,1])
plt.legend([p1,p2,p3,p4,p5,p6,p7,p8,p9,p10],
           ["Observed Data (All Stars)", "Observed Data (Mag:9-9.5)",
             "Observed Data (Mag:9.5-10)",  "Observed Data (Mag:10-10.5)",
             "Current Model: Mag=9.5","Current Model: Mag=10.","Current Model: Mag=10.5",
             "Logistic Regression: Mag=9.5","Logistic Regression: Mag=10.","Logistic Regression: Mag=10.5"], 
           bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylim((0.,1.))
F.set_size_inches(10,3)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Fit using first order terms plus Interaction')
plt.show()


[[  1.     6.25   0.12   0.75]
 [  1.     6.5    0.12   0.78]
 [  1.     6.75   0.12   0.81]
 [  1.     7.     0.12   0.84]
 [  1.     7.25   0.12   0.87]
 [  1.     7.5    0.12   0.9 ]
 [  1.     7.75   0.12   0.93]
 [  1.     8.     0.12   0.96]
 [  1.     8.25   0.12   0.99]
 [  1.     8.5    0.12   1.02]
 [  1.     8.75   0.12   1.05]
 [  1.     9.     0.12   1.08]
 [  1.     9.25   0.12   1.11]
 [  1.     9.5    0.12   1.14]
 [  1.     9.75   0.12   1.17]
 [  1.    10.     0.12   1.2 ]
 [  1.    10.25   0.12   1.23]
 [  1.    10.5    0.12   1.26]
 [  1.    10.75   0.12   1.29]
 [  1.    11.     0.12   1.32]
 [  1.    11.25   0.12   1.35]
 [  1.    11.5    0.12   1.38]]
[[-11.92798584   2.0382872   -0.91494086   1.46756906]]
{'C': 10000000000.0, 'intercept_scaling': 1, 'fit_intercept': True, 'penalty': 'l2', 'random_state': None, 'dual': False, 'tol': 0.0001, 'class_weight': None}
[  4.00319443e-05   6.96335862e-05   1.21121526e-04   2.10672269e-04
   3.66407751e-04   6.37194367e-04   1.10787884e-03   1.92557989e-03
   3.34478721e-03   5.80390825e-03   1.00527636e-02   1.73577642e-02
   2.98112061e-02   5.07381087e-02   8.50672398e-02   1.39216864e-01
   2.19563682e-01   3.28580568e-01   4.59833468e-01   5.96905019e-01
   7.20347235e-01   8.17542188e-01]
[  6.25   6.5    6.75   7.     7.25   7.5    7.75   8.     8.25   8.5
   8.75   9.     9.25   9.5    9.75  10.    10.25  10.5   10.75  11.    11.25
  11.5 ]

Let's play with some Bayesian Estimates for comparison!


In [11]:
import statsmodels.api as sm

out = sm.formula.glm('success ~ mag + warm_pix + mag:warm_pix', yr2013, family=sm.families.Binomial(sm.families.links.logit)).fit()
print out.summary()


                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:                success   No. Observations:                12897
Model:                            GLM   Df Residuals:                    12893
Model Family:                Binomial   Df Model:                            3
Link Function:                  logit   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -1650.1
Date:                Wed, 13 Aug 2014   Deviance:                       3300.2
Time:                        10:00:17   Pearson chi2:                 2.34e+05
No. Iterations:                     9                                         
================================================================================
                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------
Intercept      -20.4555      6.801     -3.008      0.003       -33.786    -7.125
mag              1.6954      0.686      2.470      0.014         0.350     3.041
warm_pix       -39.9456     77.283     -0.517      0.605      -191.417   111.526
mag:warm_pix     5.4054      7.806      0.692      0.489        -9.894    20.704
================================================================================

In [12]:
# Found using R's GLM function!
bayes_fit = bR.probitRegression('success ~ mag + warm_pix + mag:warm_pix', yr2013, 
                                n_iter=3000, burnin=1000, calcIRWLS=False, plots=False, identifier="acadata2013")


Output for: acadata2013
Model     : success ~ mag + warm_pix + mag:warm_pix
            
100.0% of iterations complete
Simulation Results (3000 Iterations w/ 1000 Burn In):

Method: Bayesian Binary Probit Regression - Holmes and Held (2006)

Coefficient            Estimate     StdErr     z-Score      pValue
Intercept             -9.447e+00   9.144e-01  -1.03e+01    0.000e+00
mag                    7.586e-01   9.546e-02   7.95e+00    1.998e-15
warm_pix              -4.189e+00   9.460e+00  -4.43e-01    6.579e-01
mag:warm_pix           1.030e+00   9.959e-01   1.03e+00    3.011e-01


In [ ]:
nbins = np.arange(6,11.75,0.25)

failure_counts, bin_edges = np.histogram(yr2013_f['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts

mags = bin_edges[1:]
# print (test_0_12)
print mags
f = plt.figure()

plt.plot(mags,percent_fail)

# for i in np.arange(len(bayes_fit.betadraws[1000:,:])):
for i in np.arange(1,100,1.):
    betatest = bayes_fit.betadraws[1000+i,:]
    trace = stats.norm.cdf(np.dot(test_0_12, betatest))
    plt.plot(mags, trace, color='black', alpha=0.2)
    
plt.plot(mags, stats.norm.cdf(np.dot(test_0_12, bayes_fit.bayes_betas)), linewidth=2, color="red")
plt.xlim((8,10.5))
plt.ylim((0,0.4))
f.set_size_inches(10,6)
plt.show()

In [125]:
nlf2.coef_
bayes_fit.bayes_betas


Out[125]:
array([ -7.85244817,   0.62539174, -20.5380285 ,   2.51630751])

In [132]:
import scipy.stats as stats
print bayes_fit.bayes_betas

percent_fail, bin_edges = getpercentfail_byWPF(0.0,.2)
nlf2 = linear_model.LogisticRegression(C=1e5)
nlf2.fit(design,response)

b = np.array([0.4370, -0.2353, -91.4697, 9.8803])

def maketestpts(warm_pix):
    fixed_warm_pix = warm_pix*np.ones(len(bin_edges[1:]))
    test = np.vstack((np.ones(len(bin_edges[1:])), 
                      bin_edges[1:],
                      fixed_warm_pix,
                      bin_edges[1:]*fixed_warm_pix
                      )).transpose()
    print test
    return test

test_0_12 = maketestpts(0.12)
# print test_0_12
# print bR.invlogit(np.dot(test_0_12, bayes_fit.bayes_betas))

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], nlf2.predict_proba(test_0_12)[:,1], linestyle='--')
p3, = plt.plot(bin_edges[1:], stats.norm.cdf((np.dot(test_0_12, bayes_fit.bayes_betas))), linestyle='--')
p4, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.12), linestyle="--")
plt.legend([p1,p2,p3, p4],
           ["Observed", "IRLS Logistic Regression", "Bayes Probit Regression", "Model: WP=0.12"], loc=2)
plt.ylim((0.,1.))
plt.xlim((8.,11.))
F.set_size_inches(10,6)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Fit using first order terms plus Interaction')
F.savefig("bayes_logit_compare.pdf", type='pdf')
plt.show()

percent_fail = failure_counts.astype(float) / total_counts

def curProb(mag, N100):
    mag = mag - 10.
    scale = 10**(0.18 + 0.99*mag - 0.49*mag*mag)
    offset = 10**(-1.49 + 0.89*mag + 0.28*mag*mag)
    return scale * N100 + offset


F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], nlf2.predict_proba(test_0_12)[:,1], linestyle='--')
p3, = plt.plot(bin_edges[1:], bR.invlogit(np.dot(test_0_12, out.params)), linestyle='--')
p4, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.12), linestyle="--")

plt.legend([p1,p2,p3,p4],
           ["Observed", "IRLS Logistic Regression (Sklearn)", "IRLS Logistic Regression (Statsmodels)", "Model: WP=0.12"], loc=2)
plt.ylim((0.,1.))
plt.xlim((8.,11.))
F.set_size_inches(10,5)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Fit using first order terms plus Interaction')
F.savefig('IRLS_fit.pdf',type='pdf')
plt.show()

F = plt.figure()
p1, = plt.plot(bin_edges[1:], percent_fail)
p2, = plt.plot(bin_edges[1:], stats.norm.cdf((np.dot(test_0_12, bayes_fit.bayes_betas))), linestyle='--')
p3, = plt.plot(bin_edges[1:], curProb(bin_edges[1:], 0.12), linestyle="--")

plt.legend([p1,p2,p3],
           ["Observed", "Bayes Probit Regression", "Model: WP=0.12"], loc=2)
plt.ylim((0.,1.))
plt.xlim((8.,11.))
F.set_size_inches(10,6)
plt.xlabel('Star Magnitude')
plt.ylabel('Proportion Failed')
plt.title('Fit using first order terms plus Interaction')
F.savefig('Bayes_fit.pdf',type='pdf')
plt.show()

print(percent_fail)


[ -7.85244817   0.62539174 -20.5380285    2.51630751]
[[  1.     6.25   0.12   0.75]
 [  1.     6.5    0.12   0.78]
 [  1.     6.75   0.12   0.81]
 [  1.     7.     0.12   0.84]
 [  1.     7.25   0.12   0.87]
 [  1.     7.5    0.12   0.9 ]
 [  1.     7.75   0.12   0.93]
 [  1.     8.     0.12   0.96]
 [  1.     8.25   0.12   0.99]
 [  1.     8.5    0.12   1.02]
 [  1.     8.75   0.12   1.05]
 [  1.     9.     0.12   1.08]
 [  1.     9.25   0.12   1.11]
 [  1.     9.5    0.12   1.14]
 [  1.     9.75   0.12   1.17]
 [  1.    10.     0.12   1.2 ]
 [  1.    10.25   0.12   1.23]
 [  1.    10.5    0.12   1.26]
 [  1.    10.75   0.12   1.29]
 [  1.    11.     0.12   1.32]
 [  1.    11.25   0.12   1.35]
 [  1.    11.5    0.12   1.38]]
[ 0.          0.01086957  0.01324503  0.00429185  0.00384615  0.00906344
  0.01886792  0.00378788  0.00742574  0.00218818  0.00837989  0.00998004
  0.01873112  0.04489338  0.06460876  0.10437452  0.17765363  0.36141304
  0.45977011  0.58974359  1.                 nan]

Let's get some confidence intervals


In [91]:
def randomprediction(mag, wpf):
    X = np.array([1, mag, wpf, mag*wpf])
    beta0 = np.random.choice(bayes_fit.betadraws[500:,0])
    beta1 = np.random.choice(bayes_fit.betadraws[500:,1])
    beta2 = np.random.choice(bayes_fit.betadraws[500:,2])
    beta3 = np.random.choice(bayes_fit.betadraws[500:,3])
    betatest = np.array([beta0, beta1, beta2, beta3])
    return stats.norm.cdf(np.dot(X,betatest.transpose()))


# for j in np.arange(6,11.5,0.5):
#     print j
#     outtest = []
#     for i in np.arange(1,100000):
#         outtest.append(randomprediction(j, 0.18))
#     f = plt.figure()
#     plt.hist(outtest,normed=True,bins=20)
# #     plt.show()

In [1]:
nbins = np.arange(6,11.75,0.25)

failure_counts, bin_edges = np.histogram(yr2013_f['mag'], bins=nbins)
success_counts, bin_edges = np.histogram(yr2013_s['mag'], bins=nbins)
total_counts, bin_edges = np.histogram(yr2013['mag'], bins=nbins)
percent_fail = failure_counts.astype(float) / total_counts

mags = bin_edges[1:]
# print (test_0_12)
print mags
f = plt.figure()

plt.plot(mags,percent_fail)

# for i in np.arange(len(bayes_fit.betadraws[1000:,:])):
for i in np.arange(1,100,1.):
    betatest = bayes_fit.betadraws[1000+i,:]
    trace = stats.norm.cdf(np.dot(test_0_12, betatest))
    plt.plot(mags, trace, color='black', alpha=0.2)
    
plt.plot(mags, stats.norm.cdf(np.dot(test_0_12, bayes_fit.bayes_betas)), linewidth=2, color="red")
plt.xlim((8,10.5))
plt.ylim((0,0.4))
f.set_size_inches(10,6)
plt.show()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-4ed221321f59> in <module>()
----> 1 nbins = np.arange(6,11.75,0.25)
      2 
      3 failure_counts, bin_edges = np.histogram(yr2013_f['mag'], bins=nbins)
      4 success_counts, bin_edges = np.histogram(yr2013_s['mag'], bins=nbins)
      5 total_counts, bin_edges = np.histogram(yr2013['mag'], bins=nbins)

NameError: name 'np' is not defined