In [1]:
import os, os.path
from scipy import stats, special, misc
import fitsio
import extreme_deconvolution
from apogee.spec.plot import apStarWavegrid
import apogee.tools.read as apread
%pylab inline
from galpy.util import bovy_plot
from matplotlib import colors
import seaborn as sns
sns.set_style({'xtick.direction': u'in',
'ytick.direction': u'in',
'axes.labelsize': 18.0,
'axes.titlesize': 18.0,
'figure.figsize': array([ 6.64, 4. ]),
'grid.linewidth': 2.0,
'legend.fontsize': 18.0,
'lines.linewidth': 2.0,
'lines.markeredgewidth': 0.0,
'lines.markersize': 14.0,
'patch.linewidth': 0.6,
'xtick.labelsize': 16.0,
'xtick.major.pad': 14.0,
'xtick.major.width': 2.0,
'xtick.minor.width': 1.0,
'ytick.labelsize': 16.0,
'ytick.major.pad': 14.0,
'ytick.major.width': 2.0,})
Some code to load the pre-calculated LSFs for all fibers:
In [2]:
def loadLSF(fiber):
return fitsio.read(os.path.join(os.getenv('DATADIR'),'spectral-tagging','lsf-dr13',
'apogee-lsf-dr13-%03i.fits' % fiber))
In [3]:
allLSFs= []
for fiber in range(1,301):
allLSFs.append(loadLSF(fiber))
In [61]:
lsfarr= numpy.array(allLSFs)
In [4]:
xlsf= numpy.linspace(-7.,7.,43)
def fwhm(lsf):
if len(lsf.shape) > 1:
return [(numpy.interp(0.5*tlsf[21],tlsf[::-1][:20],xlsf[::-1][:20])-
numpy.interp(0.5*tlsf[21],tlsf[:20],xlsf[:20]))
for tlsf in lsf]
else:
return (numpy.interp(0.5*lsf[21],lsf[::-1][:20],xlsf[::-1][:20])-
numpy.interp(0.5*lsf[21],lsf[:20],xlsf[:20]))
Let's first look at how much the LSF changes as a function of wavelength
In [5]:
figsize(12,6)
plot(apStarWavegrid(),fwhm(allLSFs[49][::3]))
plot(apStarWavegrid(),fwhm(allLSFs[149][::3]))
plot(apStarWavegrid(),fwhm(allLSFs[249][::3]))
xlabel(r'$\lambda/\AA$')
ylabel(r'$\mathrm{FWHM/pixel}$')
Out[5]:
They vary quite a bit (the increase at the blue end is probably not real and is improved in the r6 reduction), but they vary similarly, so we'll just use one wavelength / detector. Those used in Figure 16 in Nidever et al. (2015) are
In [6]:
pix_blue= int(round(numpy.interp(15450.,apStarWavegrid(),numpy.arange(8575))))
pix_green= int(round(numpy.interp(16130.,apStarWavegrid(),numpy.arange(8575))))
pix_red= int(round(numpy.interp(16740.,apStarWavegrid(),numpy.arange(8575))))
print pix_blue, pix_green, pix_red
In [7]:
fwhm_blue= numpy.array([fwhm(allLSFs[ll][::3][pix_blue]) for ll in range(300)])
fwhm_green= numpy.array([fwhm(allLSFs[ll][::3][pix_green]) for ll in range(300)])
fwhm_red= numpy.array([fwhm(allLSFs[ll][::3][pix_red]) for ll in range(300)])
In [8]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(numpy.arange(1,301),fwhm_blue,'.',
gcf=True,
yrange=[2.5,3.5],
xrange=[0.,301.],
xlabel=r'$\mathrm{Fiber\ number}$',
ylabel=r'$\mathrm{FWHM\ (blue)}$')
subplot(1,3,2)
bovy_plot.bovy_plot(numpy.arange(1,301),fwhm_green,'.',
gcf=True,
yrange=[2.5,3.5],
xrange=[0.,301.],
xlabel=r'$\mathrm{Fiber\ number}$',
ylabel=r'$\mathrm{FWHM\ (green)}$')
subplot(1,3,3)
bovy_plot.bovy_plot(numpy.arange(1,301),fwhm_red,'.',
gcf=True,
yrange=[2.5,3.5],
xrange=[0.,301.],
xlabel=r'$\mathrm{Fiber\ number}$',
ylabel=r'$\mathrm{FWHM\ (red)}$')
tight_layout()
Is the bifurcation in the red real?
In [68]:
bovy_plot.bovy_dens2d(lsfarr[:,pix_red*3,21-10:21+10].T,origin='lower',cmap='afmhot',
interpolation='nearest')
Out[68]:
That has some jumps it seems. Let's look at the LSF of two neighboring fibers with very different FWHM:
In [85]:
bovy_plot.bovy_plot(xlsf,lsfarr[206,3*pix_red])
bovy_plot.bovy_plot(xlsf,lsfarr[205,3*pix_red],overplot=True)
Out[85]:
Which are the bad fibers?
In [90]:
print numpy.arange(1,301)[(fwhm_red < 3.)+(numpy.arange(1,301) > 160)*(fwhm_red < 3.07)]
In [9]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (green)}$')
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$')
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
colorbar=True,cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (green)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$',
clabel=r'$\mathrm{Fiber\ number}$')
Out[9]:
Let's calculate the typical difference in FWHM for the visits going into a combined spectrum, for a random subset of allStar
In [10]:
allStar= apread.allStar(raw=True)
random_subindx= numpy.random.permutation(len(allStar))[:10000]
allStar= allStar[random_subindx]
In [11]:
allVisit= apread.allVisit(raw=True)
In [12]:
contributing_LSFs= []
for ii in range(len(allStar)):
this_contributing_LSFs= []
for jj in range(allStar['NVISITS'][ii]):
this_contributing_LSFs.append(allVisit[allStar['VISIT_PK'][ii,jj]]['FIBERID'])
contributing_LSFs.append(this_contributing_LSFs)
In [13]:
fwhm_dispersion_in_combined_blue= [numpy.std([fwhm(allLSFs[fib-1][::3][pix_blue]) for fib in contributing_LSF])
for contributing_LSF in contributing_LSFs]
fwhm_dispersion_in_combined_green= [numpy.std([fwhm(allLSFs[fib-1][::3][pix_green]) for fib in contributing_LSF])
for contributing_LSF in contributing_LSFs]
fwhm_dispersion_in_combined_red= [numpy.std([fwhm(allLSFs[fib-1][::3][pix_red]) for fib in contributing_LSF])
for contributing_LSF in contributing_LSFs]
Now we show the distribution of dispersion in FWHM in the blue, green, and red compared to the total dispersion in FWHM for all fibers
In [14]:
figsize(16,4)
subplot(1,3,1)
dum= hist(fwhm_dispersion_in_combined_blue/numpy.std(fwhm_blue),range=[0.,2.],
bins=101,normed=True)
bovy_plot.bovy_text(r'$\mathrm{Typical:}\ = %.2f$' % (numpy.median(fwhm_dispersion_in_combined_blue/numpy.std(fwhm_blue))),
top_right=True,size=18.)
ylim(0.,2.)
subplot(1,3,2)
dum= hist(fwhm_dispersion_in_combined_green/numpy.std(fwhm_green),range=[0.,2.],
bins=101,normed=True)
xlabel(r'$\sigma_\mathrm{FWHM,going\ into\ 1\ combined} / \sigma_\mathrm{FWHM, all\ fibers}$')
ylim(0.,2.)
bovy_plot.bovy_text(r'$\mathrm{Typical:}\ = %.2f$' % (numpy.median(fwhm_dispersion_in_combined_green/numpy.std(fwhm_green))),
top_right=True,size=18.)
subplot(1,3,3)
dum= hist(fwhm_dispersion_in_combined_red/numpy.std(fwhm_red),range=[0.,2.],
bins=101,normed=True)
ylim(0.,2.)
bovy_plot.bovy_text(r'$\mathrm{Typical:}\ = %.2f$' % (numpy.median(fwhm_dispersion_in_combined_red/numpy.std(fwhm_red))),
top_right=True,size=18.)
Thus, the typical dispersion in FWHM of the visits going into a combined spectrum is <~ 0.2 of the total dispersion in FWHM. The actual typical dispersion is
In [15]:
print numpy.median(fwhm_dispersion_in_combined_blue), \
numpy.median(fwhm_dispersion_in_combined_green), \
numpy.median(fwhm_dispersion_in_combined_red)
Let's now run a clustering analysis on the LSFs to determine good groupings. We'll use extreme deconvolution to perform Gaussian clustering. Setup the LSF data for the clustering, using as the random error the typical dispersion in FWHM of a combined visit, and write a function to run the clustering algorithm:
In [16]:
ydata= numpy.vstack([fwhm_blue,fwhm_green,fwhm_red]).T
ycovar= numpy.ones_like(ydata)
ycovar[:,0]= numpy.median(fwhm_dispersion_in_combined_blue)**2.
ycovar[:,1]= numpy.median(fwhm_dispersion_in_combined_green)**2.
ycovar[:,2]= numpy.median(fwhm_dispersion_in_combined_red)**2.
In [17]:
def run_xd(ngauss,ydata,ycovar):
# Setup the fit
xamp= numpy.ones(ngauss)/float(ngauss)
xmean= numpy.zeros((ngauss,ydata.shape[1]))
xcovar= numpy.zeros((ngauss,ydata.shape[1],ydata.shape[1]))
for ii in range(ngauss):
xmean[ii]= numpy.median(ydata,axis=0)+numpy.random.normal(size=ydata.shape[1])*numpy.std(ydata,axis=0)/10.
xcovar[ii]= numpy.cov(ydata,rowvar=0)
lnl= extreme_deconvolution.extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar)
return (lnl,xamp,xmean,xcovar)
We can determine a good number of groupings using leave-a-subset-out-cross-validation. We write a function to compute the cross-validation likelihood and set a part of the fibers aside as a test set:
In [18]:
def crossval_lnl(ngauss,ydata_train,ycovar_train,ydata_test,ycovar_test):
# Function to fit the Gaussian mixture to a subset (_train) and evaluate the likelihood of _test
xdlnl, xamp, xmean, xcovar= run_xd(ngauss,ydata_train,ycovar_train)
return extreme_deconvolution.extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar,likeonly=True)
In [19]:
permIndx= numpy.random.permutation(300)
ydata_train= ydata[permIndx[:240]]
ycovar_train= ycovar[permIndx[:240]]
ydata_test= ydata[permIndx[240:]]
ycovar_test= ycovar[permIndx[240:]]
In [20]:
cval_lnl= [crossval_lnl(ng,ydata_train,ycovar_train,ydata_test,ycovar_test) for ng in range(1,16)]
In [21]:
figsize(12,6)
bovy_plot.bovy_plot(range(1,len(cval_lnl)+1),cval_lnl,'o',
xrange=[0,len(cval_lnl)+2],
yrange=[0.,5.],
xlabel=r'$\mathrm{Number\ of\ clusters}$',
ylabel=r'$\mathrm{Avg.\ likelihood\ of\ test\ set}$')
Out[21]:
It seems like with 5 clusters most of the variation in the LSF can be explained. Let's look at that solution:
In [47]:
lnl, xamp, xmean, xcovar= run_xd(5,ydata,ycovar)
print lnl
In [48]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (green)}$',
zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
colorbar=True,cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (green)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$',
clabel=r'$\mathrm{Fiber\ number}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
tight_layout()
Now we compute assignments of fibers to clusters:
In [49]:
prob= numpy.empty((300,len(xmean)))
for gg in range(len(xmean)):
for ii in range(300):
nm= stats.multivariate_normal(mean=xmean[gg],cov=xcovar[gg]+ycovar[ii])
prob[ii,gg]= numpy.log(xamp[gg])+nm.logpdf(ydata[ii])
for ii in range(300): prob[ii]-= misc.logsumexp(prob[ii])
prob= numpy.exp(prob)
assigned= [numpy.arange(len(xmean))[numpy.argmax(p)] for p in prob]
In [50]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=assigned,
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (green)}$',
zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=assigned,
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=assigned,
edgecolor='none',scatter=True,
gcf=True,
colorbar=True,cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (green)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$',
clabel=r'$\mathrm{Assigned\ cluster}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
tight_layout()
In [51]:
figsize(12,6)
bovy_plot.bovy_plot(numpy.arange(1,301),assigned,'.',
yrange=[-1.,5.],
xrange=[0.,301.],
xlabel=r'$\mathrm{Fiber\ number}$',
ylabel=r'$\mathrm{Assigned\ cluster}$')
Out[51]:
In [52]:
print numpy.arange(1,301)[numpy.array(assigned) == 1]
In [53]:
for gg in range(len(xmean)):
indx= numpy.argmin((ydata[:,0]-xmean[gg,0])**2.+(ydata[:,1]-xmean[gg,1])**2.+(ydata[:,2]-xmean[gg,2])**2.)
print gg, numpy.arange(1,301)[indx]
So 4 good groupings appear to be: 1-38, 39-150,151-250,251-300, with good representative fibers for each group: 19, 83, 216, and 298 This was for the DR12 LSFs, which don't have the bifurcation in the red FWHM issue of these DR13 ones.
In [56]:
lnl, xamp, xmean, xcovar= run_xd(4,ydata,ycovar)
print lnl
In [57]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (green)}$',
zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
colorbar=True,cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (green)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$',
clabel=r'$\mathrm{Fiber\ number}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
tight_layout()
In [58]:
prob= numpy.empty((300,len(xmean)))
for gg in range(len(xmean)):
for ii in range(300):
nm= stats.multivariate_normal(mean=xmean[gg],cov=xcovar[gg]+ycovar[ii])
prob[ii,gg]= numpy.log(xamp[gg])+nm.logpdf(ydata[ii])
for ii in range(300): prob[ii]-= misc.logsumexp(prob[ii])
prob= numpy.exp(prob)
assigned= [numpy.arange(len(xmean))[numpy.argmax(p)] for p in prob]
In [59]:
figsize(12,6)
bovy_plot.bovy_plot(numpy.arange(1,301),assigned,'.',
yrange=[-1.,4.],
xrange=[0.,301.],
xlabel=r'$\mathrm{Fiber\ number}$',
ylabel=r'$\mathrm{Assigned\ cluster}$')
Out[59]:
In [33]:
lnl, xamp, xmean, xcovar= run_xd(3,ydata,ycovar)
In [34]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (green)}$',
zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (blue)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
edgecolor='none',scatter=True,
gcf=True,
colorbar=True,cmap='afmhot',
xlabel=r'$\mathrm{FWHM\ (green)}$',
ylabel=r'$\mathrm{FWHM\ (red)}$',
clabel=r'$\mathrm{Fiber\ number}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
xrange=[numpy.amin(x),numpy.amax(x)],
yrange=[numpy.amin(y),numpy.amax(y)],
justcontours=True,overplot=True,
cntrmass=True,
levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
zorder=gg)
tight_layout()
In [35]:
prob= numpy.empty((300,len(xmean)))
for gg in range(len(xmean)):
for ii in range(300):
nm= stats.multivariate_normal(mean=xmean[gg],cov=xcovar[gg]+ycovar[ii])
prob[ii,gg]= numpy.log(xamp[gg])+nm.logpdf(ydata[ii])
for ii in range(300): prob[ii]-= misc.logsumexp(prob[ii])
prob= numpy.exp(prob)
assigned= [numpy.arange(len(xmean))[numpy.argmax(p)] for p in prob]
In [36]:
figsize(12,6)
bovy_plot.bovy_plot(numpy.arange(1,301),assigned,'.',
yrange=[-1.,3.],
xrange=[0.,301.],
xlabel=r'$\mathrm{Fiber\ number}$',
ylabel=r'$\mathrm{Assigned\ cluster}$')
Out[36]:
In [ ]: