In [1]:
import os, os.path
from scipy import stats, special, misc
import fitsio
import extreme_deconvolution
from apogee.spec.plot import apStarWavegrid
import apogee.tools.read as apread
%pylab inline
from galpy.util import bovy_plot
from matplotlib import colors
import seaborn as sns
sns.set_style({'xtick.direction': u'in',
               'ytick.direction': u'in',
               'axes.labelsize': 18.0,
               'axes.titlesize': 18.0,
               'figure.figsize': array([ 6.64,  4.  ]),
               'grid.linewidth': 2.0,
               'legend.fontsize': 18.0,
               'lines.linewidth': 2.0,
               'lines.markeredgewidth': 0.0,
               'lines.markersize': 14.0,
               'patch.linewidth': 0.6,
               'xtick.labelsize': 16.0,
               'xtick.major.pad': 14.0,
               'xtick.major.width': 2.0,
               'xtick.minor.width': 1.0,
               'ytick.labelsize': 16.0,
               'ytick.major.pad': 14.0,
               'ytick.major.width': 2.0,})


Populating the interactive namespace from numpy and matplotlib

What are good groupings of LSFs to use for the APOGEE analysis?

How much do the LSFs vary?

Some code to load the pre-calculated LSFs for all fibers:


In [2]:
def loadLSF(fiber):
    return fitsio.read(os.path.join(os.getenv('DATADIR'),'spectral-tagging','lsf-dr13',
                                    'apogee-lsf-dr13-%03i.fits' % fiber))

In [3]:
allLSFs= []
for fiber in range(1,301):
    allLSFs.append(loadLSF(fiber))

In [61]:
lsfarr= numpy.array(allLSFs)

In [4]:
xlsf= numpy.linspace(-7.,7.,43) 
def fwhm(lsf):
    if len(lsf.shape) > 1:
        return [(numpy.interp(0.5*tlsf[21],tlsf[::-1][:20],xlsf[::-1][:20])-
                 numpy.interp(0.5*tlsf[21],tlsf[:20],xlsf[:20]))
                for tlsf in lsf]
    else:
        return (numpy.interp(0.5*lsf[21],lsf[::-1][:20],xlsf[::-1][:20])-
                numpy.interp(0.5*lsf[21],lsf[:20],xlsf[:20]))

Let's first look at how much the LSF changes as a function of wavelength


In [5]:
figsize(12,6)
plot(apStarWavegrid(),fwhm(allLSFs[49][::3]))
plot(apStarWavegrid(),fwhm(allLSFs[149][::3]))
plot(apStarWavegrid(),fwhm(allLSFs[249][::3]))
xlabel(r'$\lambda/\AA$')
ylabel(r'$\mathrm{FWHM/pixel}$')


Out[5]:
<matplotlib.text.Text at 0x11371ef50>

They vary quite a bit (the increase at the blue end is probably not real and is improved in the r6 reduction), but they vary similarly, so we'll just use one wavelength / detector. Those used in Figure 16 in Nidever et al. (2015) are


In [6]:
pix_blue= int(round(numpy.interp(15450.,apStarWavegrid(),numpy.arange(8575))))
pix_green= int(round(numpy.interp(16130.,apStarWavegrid(),numpy.arange(8575))))
pix_red= int(round(numpy.interp(16740.,apStarWavegrid(),numpy.arange(8575))))
print pix_blue, pix_green, pix_red


1655 4772 7459

In [7]:
fwhm_blue= numpy.array([fwhm(allLSFs[ll][::3][pix_blue]) for ll in range(300)])
fwhm_green= numpy.array([fwhm(allLSFs[ll][::3][pix_green]) for ll in range(300)])
fwhm_red= numpy.array([fwhm(allLSFs[ll][::3][pix_red]) for ll in range(300)])

In [8]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(numpy.arange(1,301),fwhm_blue,'.',
                    gcf=True,
                   yrange=[2.5,3.5],
                   xrange=[0.,301.],
                   xlabel=r'$\mathrm{Fiber\ number}$',
                   ylabel=r'$\mathrm{FWHM\ (blue)}$')
subplot(1,3,2)
bovy_plot.bovy_plot(numpy.arange(1,301),fwhm_green,'.',
                    gcf=True,
                   yrange=[2.5,3.5],
                   xrange=[0.,301.],
                   xlabel=r'$\mathrm{Fiber\ number}$',
                   ylabel=r'$\mathrm{FWHM\ (green)}$')
subplot(1,3,3)
bovy_plot.bovy_plot(numpy.arange(1,301),fwhm_red,'.',
                    gcf=True,
                   yrange=[2.5,3.5],
                   xrange=[0.,301.],
                   xlabel=r'$\mathrm{Fiber\ number}$',
                   ylabel=r'$\mathrm{FWHM\ (red)}$')
tight_layout()


Is the bifurcation in the red real?


In [68]:
bovy_plot.bovy_dens2d(lsfarr[:,pix_red*3,21-10:21+10].T,origin='lower',cmap='afmhot',
                     interpolation='nearest')


Out[68]:
<matplotlib.image.AxesImage at 0x1d58703d0>

That has some jumps it seems. Let's look at the LSF of two neighboring fibers with very different FWHM:


In [85]:
bovy_plot.bovy_plot(xlsf,lsfarr[206,3*pix_red])
bovy_plot.bovy_plot(xlsf,lsfarr[205,3*pix_red],overplot=True)


Out[85]:
[<matplotlib.lines.Line2D at 0x1d98fa490>]

Which are the bad fibers?


In [90]:
print numpy.arange(1,301)[(fwhm_red < 3.)+(numpy.arange(1,301) > 160)*(fwhm_red < 3.07)]


[ 99 104 105 110 111 113 115 120 121 123 124 131 132 138 139 141 143 145
 146 147 155 157 159 161 163 164 165 166 169 173 185 187 190 191 193 196
 197 198 199 200 207 216 217 226 232 233 234 235 238 242 259 291]

In [9]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (green)}$')
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$')
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    colorbar=True,cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (green)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$',
                    clabel=r'$\mathrm{Fiber\ number}$')


Out[9]:
<matplotlib.collections.PathCollection at 0x1b53fdf10>

How much does the LSF between different visits typically vary?

Let's calculate the typical difference in FWHM for the visits going into a combined spectrum, for a random subset of allStar


In [10]:
allStar= apread.allStar(raw=True)
random_subindx= numpy.random.permutation(len(allStar))[:10000]
allStar= allStar[random_subindx]

In [11]:
allVisit= apread.allVisit(raw=True)

In [12]:
contributing_LSFs= []
for ii in range(len(allStar)):
    this_contributing_LSFs= []
    for jj in range(allStar['NVISITS'][ii]):
        this_contributing_LSFs.append(allVisit[allStar['VISIT_PK'][ii,jj]]['FIBERID'])
    contributing_LSFs.append(this_contributing_LSFs)

In [13]:
fwhm_dispersion_in_combined_blue= [numpy.std([fwhm(allLSFs[fib-1][::3][pix_blue]) for fib in contributing_LSF]) 
                                   for contributing_LSF in contributing_LSFs]
fwhm_dispersion_in_combined_green= [numpy.std([fwhm(allLSFs[fib-1][::3][pix_green]) for fib in contributing_LSF]) 
                                   for contributing_LSF in contributing_LSFs]
fwhm_dispersion_in_combined_red= [numpy.std([fwhm(allLSFs[fib-1][::3][pix_red]) for fib in contributing_LSF]) 
                                   for contributing_LSF in contributing_LSFs]

Now we show the distribution of dispersion in FWHM in the blue, green, and red compared to the total dispersion in FWHM for all fibers


In [14]:
figsize(16,4)
subplot(1,3,1)
dum= hist(fwhm_dispersion_in_combined_blue/numpy.std(fwhm_blue),range=[0.,2.],
    bins=101,normed=True)
bovy_plot.bovy_text(r'$\mathrm{Typical:}\ = %.2f$' % (numpy.median(fwhm_dispersion_in_combined_blue/numpy.std(fwhm_blue))),
                   top_right=True,size=18.)
ylim(0.,2.)
subplot(1,3,2)
dum= hist(fwhm_dispersion_in_combined_green/numpy.std(fwhm_green),range=[0.,2.],
    bins=101,normed=True)
xlabel(r'$\sigma_\mathrm{FWHM,going\ into\ 1\ combined} / \sigma_\mathrm{FWHM, all\ fibers}$')
ylim(0.,2.)
bovy_plot.bovy_text(r'$\mathrm{Typical:}\ = %.2f$' % (numpy.median(fwhm_dispersion_in_combined_green/numpy.std(fwhm_green))),
                   top_right=True,size=18.)
subplot(1,3,3)
dum= hist(fwhm_dispersion_in_combined_red/numpy.std(fwhm_red),range=[0.,2.],
    bins=101,normed=True)
ylim(0.,2.)
bovy_plot.bovy_text(r'$\mathrm{Typical:}\ = %.2f$' % (numpy.median(fwhm_dispersion_in_combined_red/numpy.std(fwhm_red))),
                   top_right=True,size=18.)


Thus, the typical dispersion in FWHM of the visits going into a combined spectrum is <~ 0.2 of the total dispersion in FWHM. The actual typical dispersion is


In [15]:
print numpy.median(fwhm_dispersion_in_combined_blue), \
    numpy.median(fwhm_dispersion_in_combined_green), \
    numpy.median(fwhm_dispersion_in_combined_red)


0.0187452738985 0.0219278901173 0.033095969596

Determining good groupings

Let's now run a clustering analysis on the LSFs to determine good groupings. We'll use extreme deconvolution to perform Gaussian clustering. Setup the LSF data for the clustering, using as the random error the typical dispersion in FWHM of a combined visit, and write a function to run the clustering algorithm:


In [16]:
ydata= numpy.vstack([fwhm_blue,fwhm_green,fwhm_red]).T
ycovar= numpy.ones_like(ydata)
ycovar[:,0]= numpy.median(fwhm_dispersion_in_combined_blue)**2.
ycovar[:,1]= numpy.median(fwhm_dispersion_in_combined_green)**2.
ycovar[:,2]= numpy.median(fwhm_dispersion_in_combined_red)**2.

In [17]:
def run_xd(ngauss,ydata,ycovar):
    # Setup the fit
    xamp= numpy.ones(ngauss)/float(ngauss)
    xmean= numpy.zeros((ngauss,ydata.shape[1]))
    xcovar= numpy.zeros((ngauss,ydata.shape[1],ydata.shape[1]))
    for ii in range(ngauss):
        xmean[ii]= numpy.median(ydata,axis=0)+numpy.random.normal(size=ydata.shape[1])*numpy.std(ydata,axis=0)/10.
        xcovar[ii]= numpy.cov(ydata,rowvar=0)
    lnl= extreme_deconvolution.extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar)
    return (lnl,xamp,xmean,xcovar)

We can determine a good number of groupings using leave-a-subset-out-cross-validation. We write a function to compute the cross-validation likelihood and set a part of the fibers aside as a test set:


In [18]:
def crossval_lnl(ngauss,ydata_train,ycovar_train,ydata_test,ycovar_test):
    # Function to fit the Gaussian mixture to a subset (_train) and evaluate the likelihood of _test
    xdlnl, xamp, xmean, xcovar= run_xd(ngauss,ydata_train,ycovar_train)
    return extreme_deconvolution.extreme_deconvolution(ydata,ycovar,xamp,xmean,xcovar,likeonly=True)

In [19]:
permIndx= numpy.random.permutation(300)
ydata_train= ydata[permIndx[:240]]
ycovar_train= ycovar[permIndx[:240]]
ydata_test= ydata[permIndx[240:]]
ycovar_test= ycovar[permIndx[240:]]

In [20]:
cval_lnl= [crossval_lnl(ng,ydata_train,ycovar_train,ydata_test,ycovar_test) for ng in range(1,16)]

In [21]:
figsize(12,6)
bovy_plot.bovy_plot(range(1,len(cval_lnl)+1),cval_lnl,'o',
                    xrange=[0,len(cval_lnl)+2],
                    yrange=[0.,5.],
                   xlabel=r'$\mathrm{Number\ of\ clusters}$',
                   ylabel=r'$\mathrm{Avg.\ likelihood\ of\ test\ set}$')


Out[21]:
[<matplotlib.lines.Line2D at 0x1d25efd50>]

It seems like with 5 clusters most of the variation in the LSF can be explained. Let's look at that solution:

Using 5 groups


In [47]:
lnl, xamp, xmean, xcovar= run_xd(5,ydata,ycovar)
print lnl


3.78101647647

In [48]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (green)}$',
                   zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    colorbar=True,cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (green)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$',
                    clabel=r'$\mathrm{Fiber\ number}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
tight_layout()


Now we compute assignments of fibers to clusters:


In [49]:
prob= numpy.empty((300,len(xmean)))
for gg in range(len(xmean)):
    for ii in range(300):
        nm= stats.multivariate_normal(mean=xmean[gg],cov=xcovar[gg]+ycovar[ii])
        prob[ii,gg]= numpy.log(xamp[gg])+nm.logpdf(ydata[ii])
for ii in range(300): prob[ii]-= misc.logsumexp(prob[ii])
prob= numpy.exp(prob)
assigned= [numpy.arange(len(xmean))[numpy.argmax(p)] for p in prob]

In [50]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=assigned,
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (green)}$',
                   zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=assigned,
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=assigned,
                    edgecolor='none',scatter=True,
                    gcf=True,
                    colorbar=True,cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (green)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$',
                    clabel=r'$\mathrm{Assigned\ cluster}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
tight_layout()



In [51]:
figsize(12,6)
bovy_plot.bovy_plot(numpy.arange(1,301),assigned,'.',
                   yrange=[-1.,5.],
                   xrange=[0.,301.],
                   xlabel=r'$\mathrm{Fiber\ number}$',
                   ylabel=r'$\mathrm{Assigned\ cluster}$')


Out[51]:
[<matplotlib.lines.Line2D at 0x1d5860b90>]

In [52]:
print numpy.arange(1,301)[numpy.array(assigned) == 1]


[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]

In [53]:
for gg in range(len(xmean)):
    indx= numpy.argmin((ydata[:,0]-xmean[gg,0])**2.+(ydata[:,1]-xmean[gg,1])**2.+(ydata[:,2]-xmean[gg,2])**2.)
    print gg, numpy.arange(1,301)[indx]


0 164
1 4
2 262
3 48
4 91

So 4 good groupings appear to be: 1-38, 39-150,151-250,251-300, with good representative fibers for each group: 19, 83, 216, and 298 This was for the DR12 LSFs, which don't have the bifurcation in the red FWHM issue of these DR13 ones.

Using 4 groups


In [56]:
lnl, xamp, xmean, xcovar= run_xd(4,ydata,ycovar)
print lnl


3.72456395281

In [57]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (green)}$',
                   zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    colorbar=True,cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (green)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$',
                    clabel=r'$\mathrm{Fiber\ number}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
tight_layout()



In [58]:
prob= numpy.empty((300,len(xmean)))
for gg in range(len(xmean)):
    for ii in range(300):
        nm= stats.multivariate_normal(mean=xmean[gg],cov=xcovar[gg]+ycovar[ii])
        prob[ii,gg]= numpy.log(xamp[gg])+nm.logpdf(ydata[ii])
for ii in range(300): prob[ii]-= misc.logsumexp(prob[ii])
prob= numpy.exp(prob)
assigned= [numpy.arange(len(xmean))[numpy.argmax(p)] for p in prob]

In [59]:
figsize(12,6)
bovy_plot.bovy_plot(numpy.arange(1,301),assigned,'.',
                   yrange=[-1.,4.],
                   xrange=[0.,301.],
                   xlabel=r'$\mathrm{Fiber\ number}$',
                   ylabel=r'$\mathrm{Assigned\ cluster}$')


Out[59]:
[<matplotlib.lines.Line2D at 0x1d553fd10>]

Using 3 groups


In [33]:
lnl, xamp, xmean, xcovar= run_xd(3,ydata,ycovar)

In [34]:
figsize(16,4)
subplot(1,3,1)
bovy_plot.bovy_plot(fwhm_blue,fwhm_green,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (green)}$',
                   zorder=10)
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,:2],cov=xcovar[gg,:2,:2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,2)
bovy_plot.bovy_plot(fwhm_blue,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (blue)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,::2],cov=xcovar[gg,::2,::2])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
subplot(1,3,3)
bovy_plot.bovy_plot(fwhm_green,fwhm_red,c=numpy.arange(1,301),
                    edgecolor='none',scatter=True,
                    gcf=True,
                    colorbar=True,cmap='afmhot',
                    xlabel=r'$\mathrm{FWHM\ (green)}$',
                    ylabel=r'$\mathrm{FWHM\ (red)}$',
                    clabel=r'$\mathrm{Fiber\ number}$')
x, y = np.mgrid[xlim()[0]:xlim()[1]:.01,ylim()[0]:ylim()[1]:.01]
pos = np.dstack((x, y))
for gg in range(len(xmean)):
    nm= stats.multivariate_normal(mean=xmean[gg,1:],cov=xcovar[gg,1:,1:])
    bovy_plot.bovy_dens2d(nm.pdf(pos).T,origin='lower',
                          xrange=[numpy.amin(x),numpy.amax(x)],
                          yrange=[numpy.amin(y),numpy.amax(y)],
                          justcontours=True,overplot=True,
                          cntrmass=True,
                          levels=special.erf(numpy.arange(1,4)/numpy.sqrt(2.)),
                          cntrcolors=colors.rgb2hex(sns.color_palette()[gg]),
                          zorder=gg)
tight_layout()



In [35]:
prob= numpy.empty((300,len(xmean)))
for gg in range(len(xmean)):
    for ii in range(300):
        nm= stats.multivariate_normal(mean=xmean[gg],cov=xcovar[gg]+ycovar[ii])
        prob[ii,gg]= numpy.log(xamp[gg])+nm.logpdf(ydata[ii])
for ii in range(300): prob[ii]-= misc.logsumexp(prob[ii])
prob= numpy.exp(prob)
assigned= [numpy.arange(len(xmean))[numpy.argmax(p)] for p in prob]

In [36]:
figsize(12,6)
bovy_plot.bovy_plot(numpy.arange(1,301),assigned,'.',
                   yrange=[-1.,3.],
                   xrange=[0.,301.],
                   xlabel=r'$\mathrm{Fiber\ number}$',
                   ylabel=r'$\mathrm{Assigned\ cluster}$')


Out[36]:
[<matplotlib.lines.Line2D at 0x1d4095890>]

In [ ]: