In [2]:
import matplotlib.pyplot as plt
import numpy as np
from psm import generate_spectrum,coeff_array,reference_point,psminfo,new_reference
from spectralspace.analysis.empca_residuals import empca_residuals, maskFilter
import spectralspace.sample.access_spectrum as acs
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [14]:
psminfo.files


Out[14]:
['reference_point',
 'training_labels',
 'reference_flux',
 'indices',
 'wavelength',
 'coeff_array']

In [15]:
# red clump medians
labels=np.array([4809.3486328125/1000.,2.6270346641540527,1.50707360e+00,
                -0.13878285139799118,0.018412001430988312,-0.025364108383655548,
                -0.13141327,-0.12060748,0.036324639,0.094804168,
                 0.027691118,-0.23517463,-0.14277041,
                -0.14712991,-0.17961116,-0.093085885,-0.15384229,
                -0.094825908541679382,8.46284132e-01])

In [16]:
tl = psminfo['training_labels']
labelnames = ['Teff','logg','vturb','ch','nh','oh','nah','mgh','alh','sih',
              'sh','kh','cah','tih','vh','mnh','nih','feh','c12c13']
for i in range(tl.shape[0]):
    print labelnames[i],np.max(tl[i]), np.min(tl[i]),labels[i]


Teff 4.99855859375 4.00053344727 4.80934863281
logg 3.62158823013 0.819273948669 2.62703466415
vturb 2.99934199474 0.10013315509 1.5070736
ch 0.376749992371 -0.770902991295 -0.138782851398
nh 0.598280012608 -0.872979998589 0.018412001431
oh 0.36159324646 -0.517212986946 -0.0253641083837
nah 0.421224564314 -0.976258933544 -0.13141327
mgh 0.517879128456 -0.66106069088 -0.12060748
alh 0.439062386751 -0.831385850906 0.036324639
sih 0.530551731586 -0.805269837379 0.094804168
sh 0.509368598461 -0.54309284687 0.027691118
kh 0.204906135798 -0.923615634441 -0.23517463
cah 0.568487763405 -0.702370047569 -0.14277041
tih 0.550010204315 -0.85531270504 -0.14712991
vh 0.470044344664 -0.980342149734 -0.17961116
mnh 0.474761366844 -0.813144683838 -0.093085885
nih 0.395760565996 -0.810276627541 -0.15384229
feh 0.315886974335 -0.720110535622 -0.0948259085417
c12c13 1.99970266437 -0.898681529075 0.846284132

In [17]:
rp = reference_point

In [18]:
test1=new_reference(order=2,newref=rp)#labels)
test2=new_reference(order=1.5,newref=rp)#labels)
plt.plot(test1-test2)


Out[18]:
[<matplotlib.lines.Line2D at 0x9c13f90>]

In [20]:
k = 1.4826
def MAD(arr):
    return k**2*np.ma.median((arr-np.ma.median(arr))**2)

In [ ]:
Teffspr = np.arange(4.5,5.0,0.05)
loggspr = np.linspace(2.5,2.7,len(Teffspr))
chs = np.linspace(-0.3,0.1,len(Teffspr))
partspecs = np.zeros((len(Teffspr),7214))
linspecs = np.zeros((len(Teffspr),7214))
quadspecs = np.zeros((len(Teffspr),7214))
for i in range(len(Teffspr)):
    partspecs[i] = new_reference(Teff=Teffspr[i],logg=loggspr[i],feh=0.1,order=1.5,newref=labels)#,ch=chs[i])
    quadspecs[i] = new_reference(Teff=Teffspr[i],logg=loggspr[i],feh=0.1,order=2,newref=labels)#,ch=chs[i])
    linspecs[i] = new_reference(Teff=Teffspr[i],logg=loggspr[i],feh=0.1,order=1,newref=labels)#,ch=chs[i])
 
diff2 = quadspecs-partspecs
diff1 = quadspecs-linspecs
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.imshow(diff1,aspect=diff1.shape[1]/float(diff1.shape[0]))#,vmin=-0.1,vmax=0.1)
plt.colorbar()
plt.subplot(122)
plt.imshow(diff2,aspect=diff2.shape[1]/float(diff2.shape[0]))#,vmin=-0.1,vmax=0.1)
plt.colorbar()
print MAD(diff1), MAD(diff2)
print np.var(diff1), np.var(diff2)


4.24934276791e-06 2.4484689425e-06
9.01466781485e-06 3.14896994343e-06

In [ ]:
rcdist_sample = empca_residuals('apogee','red_clump',maskFilter,ask=True,
                                badcombpixmask=7935,
                                datadir='/geir_data/scr/price-jones/Data/apogee_dim_reduction')


Which data release? (Enter for 12): 
properties  ['DR', 'EMPCA_wrapper', 'R2compare', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_basicStructure', '_dataSource', '_getProperties', '_match', '_sampleInfo', '_sampleType', 'applyMask', 'checkArrays', 'continuumNormalize', 'correctUncertainty', 'data', 'directoryClean', 'fibFit', 'filterCopy', 'findCorrection', 'findFit', 'findResiduals', 'fitStatistic', 'func_sort', 'getDirectory', 'imshow', 'initArrays', 'logplot', 'makeArrays', 'makeMatrix', 'multiFit', 'numberStars', 'pixelEMPCA', 'plotHistogram', 'plot_example_fit', 'resizePixelEigvec', 'sample_wrapper', 'samplesplit', 'setDeltaR2', 'setR2', 'setR2noise', 'show_sample_coverage', 'uncorrectUncertainty']
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4700
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done

In [30]:
ch_cls = 5e-3
nh_cls = 0.01
oh_cls = 0.01
nah_cls = 0.038
mgh_cls = 7.6e-3
alh_cls = 0.02
sih_cls = 8.2e-3
sh_cls = 0.024
kh_cls = 0.044
cah_cls = 0.016
tih_cls = 0.018
vh_cls = 0.06
mnh_cls = 0.013
nih_cls = 0.01
feh_cls = 4.3e-4
c12c13_cls = 0

tingspr = np.array([ch_cls,nh_cls,oh_cls,nah_cls,mgh_cls,alh_cls,sih_cls,sh_cls,kh_cls,
                    cah_cls,tih_cls,vh_cls,mnh_cls,nih_cls,feh_cls,c12c13_cls])

In [35]:
tingspr[-4:-2]


Out[35]:
array([ 0.013,  0.01 ])

In [38]:
np.log10(0.05**13/(np.prod(tingspr[:-5])*np.prod(tingspr[-4:-2])))


Out[38]:
7.1151992568230655

In [11]:
Nbins = 10000
hist,binEdges = np.histogram(rcdist_sample.teff,bins=Nbins,density=True)
tempprobs = hist/np.sum(hist)
tempchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(tempchoices,tempprobs)
plt.xlabel('Teff')

hist,binEdges = np.histogram(rcdist_sample.logg,bins=Nbins,density=True)
loggprobs = hist/np.sum(hist)
loggchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(loggchoices,loggprobs)
plt.xlabel('logg')

hist,binEdges = np.histogram(rcdist_sample.fe_h,bins=Nbins,density=True)
fe_hprobs = hist/np.sum(hist)
fe_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(fe_hchoices,fe_hprobs)
plt.xlabel('Fe/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['C_H'][rcdist_sample.matchingData['C_H']!=-9999],bins=Nbins,density=True)
c_hprobs = hist/np.sum(hist)
c_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(c_hchoices,c_hprobs)
plt.xlabel('C/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['N_H'][rcdist_sample.matchingData['N_H']!=-9999],bins=Nbins,density=True)
n_hprobs = hist/np.sum(hist)
n_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(n_hchoices,n_hprobs)
plt.xlabel('N/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['O_H'][rcdist_sample.matchingData['O_H']!=-9999],bins=Nbins,density=True)
o_hprobs = hist/np.sum(hist)
o_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(o_hchoices,o_hprobs)
plt.xlabel('O/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['NA_H'][rcdist_sample.matchingData['NA_H']!=-9999],bins=Nbins,density=True)
na_hprobs = hist/np.sum(hist)
na_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(na_hchoices,na_hprobs)
plt.xlabel('Na/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['MG_H'][rcdist_sample.matchingData['MG_H']!=-9999],bins=Nbins,density=True)
mg_hprobs = hist/np.sum(hist)
mg_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(mg_hchoices,mg_hprobs)
plt.xlabel('Mg/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['AL_H'][rcdist_sample.matchingData['AL_H']!=-9999],bins=Nbins,density=True)
al_hprobs = hist/np.sum(hist)
al_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(al_hchoices,al_hprobs)
plt.xlabel('Al/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['SI_H'][rcdist_sample.matchingData['SI_H']!=-9999],bins=Nbins,density=True)
si_hprobs = hist/np.sum(hist)
si_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(si_hchoices,si_hprobs)
plt.xlabel('Si/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['S_H'][rcdist_sample.matchingData['S_H']!=-9999],bins=Nbins,density=True)
s_hprobs = hist/np.sum(hist)
s_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(s_hchoices,s_hprobs)
plt.xlabel('S/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['K_H'][rcdist_sample.matchingData['K_H']!=-9999],bins=Nbins,density=True)
k_hprobs = hist/np.sum(hist)
k_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(k_hchoices,k_hprobs)
plt.xlabel('K/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['CA_H'][rcdist_sample.matchingData['CA_H']!=-9999],bins=Nbins,density=True)
ca_hprobs = hist/np.sum(hist)
ca_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(ca_hchoices,ca_hprobs)
plt.xlabel('Ca/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['TI_H'][rcdist_sample.matchingData['TI_H']!=-9999],bins=Nbins,density=True)
ti_hprobs = hist/np.sum(hist)
ti_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(ti_hchoices,ti_hprobs)
plt.xlabel('Ti/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['V_H'][rcdist_sample.matchingData['V_H']!=-9999],bins=Nbins,density=True)
v_hprobs = hist/np.sum(hist)
v_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(v_hchoices,v_hprobs)
plt.xlabel('V/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['MN_H'][rcdist_sample.matchingData['MN_H']!=-9999],bins=Nbins,density=True)
mn_hprobs = hist/np.sum(hist)
mn_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(mn_hchoices,mn_hprobs)
plt.xlabel('Mn/H')

hist,binEdges = np.histogram(rcdist_sample.matchingData['NI_H'][rcdist_sample.matchingData['NI_H']!=-9999],bins=Nbins,density=True)
ni_hprobs = hist/np.sum(hist)
ni_hchoices =  (2*binEdges-np.roll(binEdges,1))[1:]
plt.figure()
plt.step(ni_hchoices,ni_hprobs)
plt.xlabel('Ni/H')


Out[11]:
<matplotlib.text.Text at 0xb37d3d0>

In [12]:
Nstars = 100
linspecs = np.zeros((Nstars,7214))
quadspecs = np.zeros((Nstars,7214))
teffs = np.random.choice(tempchoices,size=Nstars,p=tempprobs)
for i in range(Nstars):
    linspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,order=1)
    quadspecs[i] = generate_spectrum(Teff=teffs[i]/1000.)
diffspec = linspecs-quadspecs
Tsort = np.argsort(teffs)
plt.imshow(diffspec[Tsort],aspect=diffspec.shape[1]/float(diffspec.shape[0]))
plt.colorbar()
MAD(diffspec), np.var(diffspec)


Out[12]:
(9.0916349606003626e-06, 4.6178326057580841e-05)

In [13]:
Nstars = 100
linspecs = np.zeros((Nstars,7214))
quadspecs = np.zeros((Nstars,7214))
teffs = np.random.choice(tempchoices,size=Nstars,p=tempprobs)
loggs = np.random.choice(loggchoices,size=Nstars,p=loggprobs)
for i in range(Nstars):
    linspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i],order=1)
    quadspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i])
diffspec = linspecs-quadspecs
Tsort = np.argsort(teffs)
plt.imshow(diffspec[Tsort],aspect=diffspec.shape[1]/float(diffspec.shape[0]))
plt.colorbar()
MAD(diffspec), np.var(diffspec)


Out[13]:
(9.3226431001294846e-06, 4.1324585372983491e-05)

In [14]:
Nstars = 100
linspecs = np.zeros((Nstars,7214))
quadspecs = np.zeros((Nstars,7214))
teffs = np.random.choice(tempchoices,size=Nstars,p=tempprobs)
loggs = np.random.choice(loggchoices,size=Nstars,p=loggprobs)
fe_hs = np.random.choice(fe_hchoices,size=Nstars,p=fe_hprobs)
for i in range(Nstars):
    linspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i],feh=fe_hs[i],order=1)
    quadspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i],feh=fe_hs[i])
diffspec = linspecs-quadspecs
Tsort = np.argsort(teffs)
plt.imshow(diffspec[Tsort],aspect=diffspec.shape[1]/float(diffspec.shape[0]))
plt.colorbar()
MAD(diffspec), np.var(diffspec)


Out[14]:
(1.0870783020569517e-05, 4.5686953820128636e-05)

In [96]:
Nstars = 1000
linspecs = np.zeros((Nstars,7214))
quadspecs = np.zeros((Nstars,7214))
teffs = np.random.choice(tempchoices,size=Nstars,p=tempprobs)
loggs = np.random.choice(loggchoices,size=Nstars,p=loggprobs)
fe_hs = np.random.choice(fe_hchoices,size=Nstars,p=fe_hprobs)
c_hs = np.random.choice(c_hchoices,size=Nstars,p=c_hprobs)
n_hs = np.random.choice(n_hchoices,size=Nstars,p=n_hprobs)
o_hs = np.random.choice(o_hchoices,size=Nstars,p=o_hprobs)
na_hs = np.random.choice(na_hchoices,size=Nstars,p=na_hprobs)
mg_hs = np.random.choice(mg_hchoices,size=Nstars,p=mg_hprobs)
al_hs = np.random.choice(al_hchoices,size=Nstars,p=al_hprobs)
si_hs = np.random.choice(si_hchoices,size=Nstars,p=si_hprobs)
s_hs = np.random.choice(s_hchoices,size=Nstars,p=s_hprobs)
k_hs = np.random.choice(k_hchoices,size=Nstars,p=k_hprobs)
ca_hs = np.random.choice(ca_hchoices,size=Nstars,p=ca_hprobs)
ti_hs = np.random.choice(ti_hchoices,size=Nstars,p=ti_hprobs)
v_hs = np.random.choice(v_hchoices,size=Nstars,p=v_hprobs)
mn_hs = np.random.choice(mn_hchoices,size=Nstars,p=mn_hprobs)
ni_hs = np.random.choice(ni_hchoices,size=Nstars,p=ni_hprobs)

for i in range(Nstars):
    linspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i],feh=fe_hs[i],ch=c_hs[i],
                                    nh=n_hs[i],oh=o_hs[i],nah=na_hs[i],mgh=mg_hs[i],
                                    alh=al_hs[i],sih=si_hs[i],sh=s_hs[i],kh=k_hs[i],
                                    cah=ca_hs[i],tih=ti_hs[i],vh=v_hs[i],mnh=mn_hs[i],
                                    nih=ni_hs[i],order=1)
    quadspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i],feh=fe_hs[i],ch=c_hs[i],
                                    nh=n_hs[i],oh=o_hs[i],nah=na_hs[i],mgh=mg_hs[i],
                                    alh=al_hs[i],sih=si_hs[i],sh=s_hs[i],kh=k_hs[i],
                                    cah=ca_hs[i],tih=ti_hs[i],vh=v_hs[i],mnh=mn_hs[i],
                                    nih=ni_hs[i],order=2)
    partspecs[i] = generate_spectrum(Teff=teffs[i]/1000.,logg=loggs[i],feh=fe_hs[i],ch=c_hs[i],
                                    nh=n_hs[i],oh=o_hs[i],nah=na_hs[i],mgh=mg_hs[i],
                                    alh=al_hs[i],sih=si_hs[i],sh=s_hs[i],kh=k_hs[i],
                                    cah=ca_hs[i],tih=ti_hs[i],vh=v_hs[i],mnh=mn_hs[i],
                                    nih=ni_hs[i],order=1.5)
diffspec1 = linspecs-quadspecs
diffspec2 = quadspecs-partspecs
Tsort = np.argsort(teffs)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.imshow(diffspec1[Tsort],aspect=diffspec1.shape[1]/float(diffspec1.shape[0]))
plt.colorbar()
plt.subplot(122)
plt.imshow(diffspec2[Tsort],aspect=diffspec2.shape[1]/float(diffspec2.shape[0]))
plt.colorbar()
print '\tLINEAR \t\t\t PARTIAL QUADRATIC'
print 'MEAN\t', np.mean(diffspec1),'\t',np.mean(diffspec2)
print 'MEDIAN\t', np.median(diffspec1),'\t', np.median(diffspec2)
print 'MAD\t', MAD(diffspec1),'\t', MAD(diffspec2)
print 'VAR\t', np.var(diffspec1),'\t', np.var(diffspec2)


	LINEAR 			 PARTIAL QUADRATIC
MEAN	0.00192718790846 	-0.000362288440787
MEDIAN	0.00031943075169 	0.000302342452016
MAD	4.36593157192e-05 	3.66497165987e-05
VAR	0.000152255701938 	0.000152667004873