In [1]:

    
%load_ext autoreload
%autoreload #Use this to reload modules if they are changed on disk while the notebook is running
from __future__ import division
from snmachine import sndata, snfeatures, snclassifier, tsne_plot
import numpy as np
import matplotlib.pyplot as plt
import time, os, pywt,subprocess
from sklearn.decomposition import PCA
from astropy.table import Table,join,vstack
from astropy.io import fits
import sklearn.metrics 
import sncosmo
%matplotlib nbagg
from astropy.table import Column









    



---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-c24a719e15e6> in <module>()
      2 get_ipython().magic('autoreload #Use this to reload modules if they are changed on disk while the notebook is running')
      3 from __future__ import division
----> 4 from snmachine import sndata, snfeatures, snclassifier, tsne_plot
      5 import numpy as np
      6 import matplotlib.pyplot as plt

ImportError: No module named 'snmachine'

SN numbers with classification code

SN001695 - 2

2457 - 33

2542 - 1

5399 - 2

13481 - 2

13866 - 1

16742 - 21

17270 - 32

27266 -32

64968 - 23

149147 -21



In [2]:

    
dataset='spcc'



In [3]:

    
# WARNING...
#Multinest uses a hardcoded character limit for the output file names. I believe it's a limit of 100 characters
#so avoid making this file path to lengthy if using nested sampling or multinest output file names will be truncated

#Change outdir to somewhere on your computer if you like
outdir=os.path.join('output_%s_no_z' %dataset,'')
out_features=os.path.join(outdir,'features') #Where we save the extracted features to
out_class=os.path.join(outdir,'classifications') #Where we save the classification probabilities and ROC curves
out_int=os.path.join(outdir,'int') #Any intermediate files (such as multinest chains or GP fits)

subprocess.call(['mkdir',outdir])
subprocess.call(['mkdir',out_features])
subprocess.call(['mkdir',out_class])
subprocess.call(['mkdir',out_int])









    Out[3]:





1



In [4]:

    
#Data root
rt=os.path.join('SPCC_SUBSET','')



In [5]:

    
dat=sndata.Dataset(rt)









    



Reading data...
11 objects read into memory.



In [6]:

    
read_from_file=False #True #We can use this flag to quickly rerun from saved features
run_name=os.path.join(out_features,'%s_all' %dataset)



In [7]:

    
dat.object_names[0]
types=dat.get_types()
types['Type'][np.floor(types['Type']/10)==2]=2
types['Type'][np.floor(types['Type']/10)==3]=3



In [8]:

    
#test_table = dat.data['DES_SN001695.DAT'] #get_lightcurve

#dat.plot_all()
#dat.data[dat.object_names[0]]



In [ ]:



In [9]:

    
realspertype=667



In [10]:

    
type_II = []
for i in range(len(dat.data['DES_SN002457.DAT']['flux'])):
    type_II.append(np.random.normal(dat.data['DES_SN002457.DAT']['flux'][i], dat.data['DES_SN002457.DAT']['flux_error'][i], realspertype))



In [11]:

    
type_Ia = []
for i in range(len(dat.data['DES_SN002542.DAT']['flux'])):
    type_Ia.append(np.random.normal(dat.data['DES_SN002542.DAT']['flux'][i], dat.data['DES_SN002542.DAT']['flux_error'][i], realspertype))



In [12]:

    
type_Ibc = []
for i in range(len(dat.data['DES_SN005399.DAT']['flux'])):
    type_Ibc.append(np.random.normal(dat.data['DES_SN005399.DAT']['flux'][i], dat.data['DES_SN005399.DAT']['flux_error'][i], realspertype))



In [13]:

    
type_Ia = np.array(type_Ia)
type_Ibc = np.array(type_Ibc)
type_II = np.array(type_II)



In [14]:

    
#test_table.replace_column('flux', type_II[:,0])



In [15]:

    
#test_table.write('testing.dat',format = 'ascii')



In [16]:

    
#dat.data['DES_SN001695.DAT']
#print type_Ia



In [17]:

    
#isinstance(dat.object_names, np.ndarray)



In [18]:

    
#test_table_II = dat.data['DES_SN002457.DAT']



In [19]:

    
#dir(dat)



In [20]:

    
#print data[0:10]
#print data[5].split()
def make_new_data(filename, table, output, sntype):
    with open(filename) as f:
        data = f.read().split("\n")
    filters = data[5].split()
    survey = data[0].split()
    stuffRA = data[6].split()
    stuffDec = data[7].split()
    MWEBV = data[11].split()
    if sntype==1:
        typestring='SN Type = Ia , MODEL = mlcs2k2.SNchallenge'
    elif sntype==2:
        typestring='SN Type = II , MODEL = SDSS-017564'
    elif sntype==3:
        typestring='SN Type = Ic , MODEL = SDSS-014475'
    else:
        typestring='SOMETHING WENT HORRIBLY WRONG'
    table.meta = {survey[0][:-1]: survey[1], stuffRA[0][:-1]: stuffRA[1], stuffDec[0][:-1]: stuffDec[1],filters[0][:-1]: filters[1],
                 MWEBV[0][:-1]: MWEBV[1], 'SNTYPE': -9, 'SIM_COMMENT': typestring  }
    #table.rename_column('mjd', 'MJD')
    #table.rename_column('filter ', 'FLT')
    #table.rename_column('flux', 'FLUXCAL')
    #table.rename_column('flux_error', 'FLUXCALERR')
    sncosmo.write_lc(table, 'new_mocks/nodesz_%s.dat'%output,pedantic=True, format = 'snana')



In [21]:

    
filename_II = 'SPCC_SUBSET/DES_SN002457.DAT'
filename_Ia = 'SPCC_SUBSET/DES_SN002542.DAT'
filename_Ibc = 'SPCC_SUBSET/DES_SN005399.DAT'

for i in range(realspertype):
    test_table_II = dat.data['DES_SN002457.DAT'].copy()
    test_table_Ia = dat.data['DES_SN002542.DAT'].copy()
    test_table_Ibc = dat.data['DES_SN005399.DAT'].copy()
    #print test_table_II

    #test_table_Ia.replace_column('mjd', test_table_Ia['mjd']+56178)
    #test_table_II.replace_column('mjd', test_table_II['mjd']+56178)
    #test_table_Ibc.replace_column('mjd', test_table_Ibc['mjd']+56178)

    test_table_II.rename_column('flux', 'FLUXCAL')
    test_table_II.rename_column('flux_error', 'FLUXCALERR')
    
    test_table_Ibc.rename_column('flux', 'FLUXCAL')
    test_table_Ibc.rename_column('flux_error', 'FLUXCALERR')
        
    test_table_Ia.rename_column('flux', 'FLUXCAL')
    test_table_Ia.rename_column('flux_error', 'FLUXCALERR')

    objs=[]
    new_data={}

    col_Ia = Table.Column(name='field',data=np.zeros(len(test_table_Ia)) )
    test_table_Ia.add_column(col_Ia, index = 2)
    col_Ibc = Table.Column(name='field',data=np.zeros(len(test_table_Ibc)) )
    test_table_Ibc.add_column(col_Ibc, index = 2)
    col_II = Table.Column(name='field',data=np.zeros(len(test_table_II)) )
    test_table_II.add_column(col_II, index = 2)


    test_table_Ia.replace_column('FLUXCAL', type_Ia[:,i])
    test_table_Ibc.replace_column('FLUXCAL', type_Ibc[:,i])
    test_table_II.replace_column('FLUXCAL', type_II[:,i])
    
    #test_table_Ia.replace_column('FLUXCALERR', test_table_Ia['FLUXCALERR'] + test_table_Ia['FLUXCALERR']*0.02)
    #test_table_Ibc.replace_column('FLUXCALERR', test_table_Ibc['FLUXCALERR'] + test_table_Ibc['FLUXCALERR']*0.02)
    #test_table_II.replace_column('FLUXCALERR', test_table_II['FLUXCALERR'] + test_table_II['FLUXCALERR']*0.02)
    
    wh_desr_II = np.where(test_table_II['filter']=='desz')
    wh_desr_Ia = np.where(test_table_Ia['filter']=='desz')
    wh_desr_Ibc = np.where(test_table_Ibc['filter']=='desz')
    
    test_table_II.remove_rows(wh_desr_II)
    test_table_Ia.remove_rows(wh_desr_Ia)
    test_table_Ibc.remove_rows(wh_desr_Ibc)
    
    make_new_data(filename_Ibc, test_table_Ibc, 'Ibc_%s'%i, 3)
    make_new_data(filename_II, test_table_II, 'II_%s'%i, 2)
    make_new_data(filename_Ia, test_table_Ia, 'Ia_%s'%i, 1)
    
    
    #objs.append((str)(i))
    #new_data[(str)(i)]=test_table_II.copy()  #new_light_curve_Ia
    #new_data[(str)(i+realspertype)]=test_table_Ia.copy()#new_light_curve_II
    #new_data[(str)(i+realspertype)]=test_table_Ibc.copy()#new_light_curve_Ibc
    
#sncosmo.read_lc(new_data)
#new_data = Counter(new_data_Ia) + Counter(new_data_II) + Counter(new_data_Ibc)

#for i in range(3*realspertype):    
#    objs.append((str)(i))
    
#dat.object_names=np.asarray(objs)
#dat.data=new_data



In [22]:

    
#print test_table_Ia
#test_table_Ia.replace_column('FLUXCALERR', test_table_Ia['FLUXCALERR']*5.)
#print test_table_Ia
#test_table_II = dat.data['DES_SN002457.DAT']
#wh_desr = np.where(test_table_II['filter']=='desr')
#test_table_II.remove_rows(wh_desr)
#print test_table_II



In [24]:

    
rt1=os.path.join('new_mocks','')
dat1=sndata.Dataset(rt1)









    



Reading data...
2001 objects read into memory.



In [25]:

    
for obj in dat1.object_names:
    for i in range(len(dat1.data[obj])):
        dat1.data[obj]['filter'][i]=dat1.data[obj]['filter'][i][3:7]



In [25]:

    
#types



In [26]:

    
#For now we restrict ourselves to three supernova types: Ia (1), II (2) and Ibc (3)
types=dat1.get_types()
types['Type'][np.floor(types['Type']/10)==2]=2
types['Type'][np.floor(types['Type']/10)==3]=3



In [27]:

    
#dat1.plot_all()
#dat1.get_types()



In [ ]:



In [27]:

    
mod1Feats=snfeatures.ParametricFeatures('karpenka',sampler='leastsq')



In [28]:

    
%%capture --no-stdout
if read_from_file:
    mod1_features=Table.read('%s_karpenka.dat' %run_name, format='ascii')
    blah=mod1_features['Object'].astype(str)
    mod1_features.replace_column('Object', blah)
else:
    mod1_features=mod1Feats.extract_features(dat1,nprocesses=4,chain_directory=out_int)
    mod1_features.write('%s_karpenka.dat' %run_name, format='ascii')









    



2001 objects fitted
Time taken is 6.22 minutes



In [29]:

    
#Unfortunately, sometimes the fitting methods return NaN for some parameters for these models.
for c in mod1_features.colnames[1:]:
    mod1_features[c][np.isnan(mod1_features[c])]=0
    
#print mod1_features[0], len(mod1_features)



In [30]:

    
mod1Feats.fit_sn
dat1.set_model(mod1Feats.fit_sn,mod1_features)









    



Fitting supernova models...






    



/Users/karaponder/Projects/Basic_Codes/snmachine/snmachine/parametric_models.py:153: RuntimeWarning: overflow encountered in exp
  return A*(1+B*(t-t1)*(t-t1))*np.exp(-(t-t0)/T_fall)/(1+np.exp(-(t-t0)/T_rise))






    



Models fitted.



In [32]:

    
dat1.plot_all()



In [ ]:



In [31]:

    
plt.figure()
tsne_plot.plot(mod1_features,join(mod1_features,types)['Type'])



In [32]:

    
plt.figure()
clss=snclassifier.run_pipeline(mod1_features,types,output_name=os.path.join(out_class,'model2'),
                          classifiers=['nb','svm','boost_dt'], nprocesses=4)









    














    











    



Created classifier of type:
Created classifier of type:
Created classifier of type:
GaussianNB()

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)



Optimised parameters: {}
Optimised parameters: {'C': 31.622776601683793, 'gamma': 0.0031622776601683794}
Optimised parameters: {'n_estimators': 25, 'base_estimator': DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=15,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')}
Classifier nb: AUC = 0.98743718593 FoM = 0.974874371859
Classifier svm: AUC = 1.0 FoM = 1.0
Classifier boost_dt: AUC = 1.0 FoM = 1.0

Time taken  1.1021290501 minutes






    



//anaconda/lib/python2.7/site-packages/matplotlib/cbook.py:137: MatplotlibDeprecationWarning: The set_color_cycle attribute was deprecated in version 1.5. Use set_prop_cycle instead.
  warnings.warn(message, mplDeprecation, stacklevel=1)

waveFeats=snfeatures.WaveletFeatures()

%%capture --no-stdout if read_from_file: wave_features=Table.read('%s_wavelets.dat' %run_name, format='ascii') #Crucial for this format of id's blah=wave_features['Object'].astype(str) wave_features.replace_column('Object', blah) PCA_vals=np.loadtxt('%s_wavelets_PCA_vals.dat' %run_name) PCA_vec=np.loadtxt('%s_wavelets_PCA_vec.dat' %run_name) PCA_mean=np.loadtxt('%s_wavelets_PCA_mean.dat' %run_name) else: wave_features=waveFeats.extract_features(dat,nprocesses=6,output_root=out_int,save_output='all') wave_features.write('%s_wavelets.dat' %run_name, format='ascii') np.savetxt('%s_wavelets_PCA_vals.dat' %run_name,waveFeats.PCA_eigenvals) np.savetxt('%s_wavelets_PCA_vec.dat' %run_name,waveFeats.PCA_eigenvectors) np.savetxt('%s_wavelets_PCA_mean.dat' %run_name,waveFeats.PCA_mean) PCA_vals=waveFeats.PCA_eigenvals PCA_vec=waveFeats.PCA_eigenvectors PCA_mean=waveFeats.PCA_mean



In [ ]:

    
np.sort(dat.object_names)



In [ ]:

    
dat.set_model(waveFeats.fit_sn,wave_features,PCA_vec,PCA_mean,0,dat.get_max_length(),dat.filter_set)



In [ ]:

    
#dat.plot_all()



In [ ]:

    
#wave_features



In [ ]:

    
#types



In [ ]:

    
#join(wave_features, types)



In [ ]:

    
#plt.figure()
#tsne_plot.plot(wave_features,join(wave_features,types)['Type'])

#This is a Features object, not yet the extracted features #We need to register the LSST bands with sncosmo (which is why lsst_bands=True) salt2Feats=snfeatures.TemplateFeatures(sampler='leastsq') #This performs the actual feature extraction. All feature extraction methods are parallelised with multiprocessing, #just set nprocesses>1 for parallelisation. if read_from_file: salt2_features=Table.read('%s_templates.dat' %run_name, format='ascii') blah=salt2_features['Object'].astype(str) salt2_features.replace_column('Object', blah) else: salt2_features=salt2Feats.extract_features(dat,use_redshift=True,nprocesses=6,chain_directory=out_int) salt2_features.write('%s_templates.dat' %run_name, format='ascii')



In [ ]:

    
#This code takes the fitted parameters and generates the model light curve for plotting purposes.
#dat.set_model(salt2Feats.fit_sn,salt2_features)



In [ ]:

    
#dat.plot_all()



In [ ]:

    
#dat.data[dat.object_names[5]]



In [ ]:



In [ ]: