In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator,AutoMinorLocator
import spectralspace.sample.access_spectrum as acs
from spectralspace.analysis.empca_residuals import *
from scipy.interpolate import interp1d
import os, glob
from spectralspace.examples.ncells_calculation import *
from spectralspace.examples.comparative_plots import *
from spectralspace.examples.pc_plotter import *
from apogee.tools import wv2pix,pix2wv,toApStarGrid
%pylab inline
In [2]:
import matplotlib
font = {'family': 'serif',
'weight': 'normal',
'size' : 20
}
matplotlib.rc('font',**font)
default_cmap = 'plasma'
datadir = './data'
datadir = '/geir_data/scr/price-jones/Data/apogee_dim_reduction'
figdir = './fig/'
if not os.path.exists(datadir):
os.system('mkdir -p {0}'.format(datadir))
if not os.path.exists(figdir):
os.system('mkdir -p {0}'.format(figdir))
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): <press Enter>
Lower limit (Enter for minimum): <press Enter>
Found good limits
And/or? done
In [69]:
redclump = empca_residuals('apogee','red_clump',maskFilter,ask=True,degree=2,datadir=datadir)
In [206]:
fig = plt.figure(figsize=(15,10))
# Create red giant subplot
ax = fig.add_subplot(111)
key = 'MEANFIB'
histogram2d(fig,ax,redclump.data['SIGFIB'][redclump.data[key]!=-9999],redclump.data[key][redclump.data[key]!=-9999],norm='log',vmin=1,vmax=5e2)
plt.axvline(15,color='k',lw=2)
plt.xlabel('sigma fiber')
plt.ylabel('mean fiber number')
Out[206]:
In [155]:
redclump.data.dtype
Out[155]:
In [186]:
numvisit = np.zeros(len(redclump.data))
for i in range(len(redclump.data)):
numvisit[i] = len(redclump.data['VISITS'][i].split(','))
In [187]:
fig = plt.figure(figsize=(15,10))
# Create red giant subplot
ax = fig.add_subplot(111)
key = 'TEFF'
histogram2d(fig,ax,redclump.data['SIGFIB'][redclump.data[key]>-9999],redclump.data[key][redclump.data[key]>-9999],norm='log',vmin=1,vmax=5e2)
In [189]:
fig = plt.figure(figsize=(15,10))
# Create red giant subplot
ax = fig.add_subplot(111)
histogram2d(fig,ax,redclump.data['SIGFIB'],numvisit,norm='log',vmin=1,vmax=5e2)
In [53]:
redclump.plotHistogram(redclump.data['SIGFIB'],norm=False,bins=100)
plt.axvline(15)
Out[53]:
In [104]:
fwhminfo = np.load(datadir+'/apogee_dr12_fiberfwhm_atpixel.npy')
fwhms_sample = fwhminfo[(np.round(redclump.matchingData['MEANFIB']-1).astype(int),)]
fwhms = np.median(fwhms_sample,axis=1)
In [133]:
redclump.imshow(fwhminfo)
In [114]:
default_cmap='plasma_r'
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,fwhms_sample.flatten(),np.repeat(redclump.teff,7214),bins=200,norm='log',vmin=1,vmax=1e6)
In [115]:
default_cmap='plasma_r'
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,fwhms_sample.flatten(),np.repeat(redclump.logg,7214),bins=200,norm='log',vmin=1,vmax=1e6)
In [117]:
default_cmap='plasma_r'
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,fwhms_sample.flatten()[np.repeat(redclump.fe_h,7214)!=-9999],np.repeat(redclump.fe_h,7214)[np.repeat(redclump.fe_h,7214)!=-9999],bins=200,norm='log',vmin=1,vmax=1e6)
In [105]:
default_cmap='plasma_r'
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,fwhms,redclump.teff,bins=200,vmax=20)
In [106]:
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,fwhms,redclump.logg,bins=200,vmax=20)
In [107]:
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,fwhms[redclump.fe_h!=-9999],redclump.fe_h[redclump.fe_h!=-9999],bins=200,vmax=15)
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): <press Enter>
Lower limit (Enter for minimum): <press Enter>
Found good limits
And/or? done
In [151]:
redgiant = empca_residuals('apogee','red_giant',maskFilter,ask=True,degree=2,datadir=datadir)
In [101]:
from matplotlib.colors import LogNorm
def histogram2d(fig,ax,x,y,bins=100,clabel=False,vmin=0,vmax=110,norm='lin'):
"""
Create a 2D histogram of data represented by the two dimensions x and y
fig: Figure to plot in
ax: Subplot object to plot in
x: Array of data values in 'x'
y: Array of data values in 'y'
bins: Number of bins in which to divide each axis
clabel: Label for the colourbar - no colourbar is plotted if this is not given
vmin: Minimum value of the histogram
vmax: Maximum value of the histogram
"""
# Create histogram
H,xedges,yedges = np.histogram2d(x,y,bins=bins)
# Reorient appropriately
H = np.rot90(H)
H = np.flipud(H)
# Mask where bins are empty
Hmasked = np.ma.masked_where(H==0,H)
# Plot histogram
if norm == 'lin':
im = ax.pcolormesh(xedges,yedges,Hmasked,
cmap = plt.get_cmap(default_cmap),
vmin=vmin,vmax=vmax)
elif norm == 'log':
im = ax.pcolormesh(xedges,yedges,Hmasked,
cmap = plt.get_cmap(default_cmap),
norm=LogNorm(vmin=vmin,vmax=vmax))
# If colourbar is desired, plot and label it
if clabel:
cbar=fig.colorbar(im,pad = 0)
cbar.set_label(label=clabel,fontsize=20)
cbar.ax.tick_params(labelsize=20)
elif not clabel:
cbar=fig.colorbar(im,pad = 0)
cbar.ax.tick_params(labelsize=20)
In [153]:
# Choose density colormap
default_cmap = 'viridis'
# Create figure of appropriate size
fig = plt.figure(figsize=(15,6))
# Create red giant subplot
ax = fig.add_subplot(121)
histogram2d(fig,ax,redgiant.teff,redgiant.logg,bins=120,vmax=35)
plt.ylim(4,0)
plt.xlim(6000,3500)
# Don't plot on extremes of axis
plt.xticks(np.arange(4000,6000,500)[::-1],fontsize=20)
# Add minor ticks on both axes
xminorlocator = MultipleLocator(250./2)
ax.xaxis.set_minor_locator(xminorlocator)
yminorlocator = MultipleLocator(0.25)
ax.yaxis.set_minor_locator(yminorlocator)
plt.xticks(np.arange(4000,6000,500)[::-1],fontsize=20)
plt.yticks(fontsize=20)
# Adjust tick thickness and length
plt.tick_params(which='both', width=2)
plt.tick_params(which='major',length=7)
plt.tick_params(which='minor',length=4)
# Add axis labels
plt.ylabel(r'$\log g$',fontsize=26)
plt.xlabel(r'$T_{\mathrm{eff}}\,\,(\mathrm{K})$',fontsize=26)
# Shade sample area
plt.axhline(2,color='k',ls='--',lw=2)
plt.axhline(4,color='k',ls='--',lw=2)
plt.fill_between(np.arange(3500,6100,100),4,2,alpha=0.1,color='k')
# Add sample description
plt.text(5800,0.5,'red giant\nstars',va='top')
# Create red clump subplot
ax = fig.add_subplot(122)
histogram2d(fig,ax,redclump.teff,redclump.logg,clabel='number of stars',bins=80,vmax=125)
plt.ylim(4,0)
plt.xlim(6000,3500)
# Don't plot on extremes of x axis
plt.xticks(np.arange(4000,6000,500)[::-1],fontsize=20)
# Add minor ticks to both axes
xminorlocator = MultipleLocator(250./2)
ax.xaxis.set_minor_locator(xminorlocator)
yminorlocator = MultipleLocator(0.25)
ax.yaxis.set_minor_locator(yminorlocator)
plt.yticks(fontsize=20)
# Adjust tick thickness and length
plt.tick_params(which='both', width=2)
plt.tick_params(which='major',length=7)
plt.tick_params(which='minor',length=4)
# Add axis labels
plt.ylabel(r'$\log g$',fontsize=26)
plt.xlabel(r'$T_{\mathrm{eff}}\,\,(\mathrm{K})$',fontsize=26)
# Shade sample region
plt.axvline(4700,color='k',ls='--',lw=2)
plt.axvline(4900,color='k',ls='--',lw=2)
plt.fill_between(np.arange(4700,5000,100),4,0,alpha=0.1,color='k')
# Label sample
plt.text(5800,0.5,'red clump\nstars',va='top')
# Reduce space between plots and save
plt.subplots_adjust(wspace=0.2)
plt.savefig('{0}/{1}'.format(figdir,'HRdiagram.pdf'))
In [154]:
oc = empca_residuals('apogee','clusters',maskFilter,ask=True,degree=2,datadir=datadir)
oc.findResiduals(minStarNum=5,gen=True)
In [8]:
oc.plot_example_fit(indep=1,pixel=4313,xlabel='$T_{\mathrm{eff}}$ - med($T_{\mathrm{eff}}$) (K)',figsize=(10,8))
plt.subplots_adjust(left=0.2)
plt.savefig('{0}/example_fit.pdf'.format(figdir))
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4700
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
In [6]:
subrc = empca_residuals('apogee','red_clump',maskFilter,ask=True,degree=2,badcombpixmask=7935,datadir = datadir)
subrc.findResiduals(gen=False)
In [ ]:
subrc.pixelEMPCA(nvecs=20,varfunc=meanMed,savename='eig20_minSNR50_corrNone_meanMed.pkl')
In [3]:
subrcmodel = acs.pklread('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/eig20_minSNR50_corrNone_meanMed.pkl'.format(datadir))
# Retrieve data
subrcmodel=getarrays(subrcmodel)
# Reconstruct unsaved data from model
subrcmodel=reconstruct_EMPCA_data('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),subrcmodel,minStarNum=5)
# Start with baseline as
totalapprox = np.tile(np.ma.mean(subrcmodel.residuals,axis=0),(subrcmodel.residuals.shape[0],1))
nvec = 8
for i in range(nvec):
totalapprox += np.outer(subrcmodel.coeff[:, i], subrcmodel.eigvec[i])
In [4]:
# Choose star
indx=312
# Retrieve plot colours
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,2))
# Set bounds on the apStarGrid
pixup = 8575
pixdown = 0
# Convert spectra to apStarGrid and reapply mask
fitspec = toApStarGrid(subrcmodel.fitspec[indx])
fitspec = np.ma.masked_array(fitspec,mask=fitspec==0)
modelspec = toApStarGrid(totalapprox[indx])
modelspec = np.ma.masked_array(modelspec,mask=modelspec==0)
compspec = toApStarGrid(subrcmodel.spectra[indx])
compspec = np.ma.masked_array(compspec,mask=(modelspec.mask|fitspec.mask))
resspec = toApStarGrid(subrcmodel.residuals[indx])
resspec = np.ma.masked_array(resspec,mask=compspec.mask)
errspec = toApStarGrid(subrcmodel.spectra_errs[indx])
errspec = np.ma.masked_array(errspec,mask=compspec.mask)
# Create array of wavelength values
wvs = pix2wv(np.arange(pixdown,pixup),apStarWavegrid = True)/1e4
# Specify xtick location
stepsize = 0.05
xticks = np.linspace(wvs[0],wvs[-1]+stepsize,stepsize)
# Initialize figure
plt.figure(figsize=(15,5))
# Begin model subplot
fit = plt.subplot2grid((3,1), (0, 0), rowspan=2)
# Plot spectrum and model
fit.plot(wvs,compspec[pixdown:pixup],lw=0.5,color=colors[0],label='original spectrum')
fit.plot(wvs,(modelspec[pixdown:pixup]+fitspec[pixdown:pixup]),lw=0.5,color=colors[-1],label='model spectrum:\n{0} principal components'.format(nvec))
fit.set_xlim(wvs[0],wvs[-1])
# Add minor ticks and labels
xminorlocator = AutoMinorLocator()
fit.xaxis.set_minor_locator(xminorlocator)
yminorlocator = AutoMinorLocator()
fit.yaxis.set_minor_locator(yminorlocator)
fit.set_xticklabels(['']*len(xticks))
fit.set_ylabel('normalized flux')
ymax=1.1
ymin=0.6
yticks = np.linspace(ymin,ymax,6)
fit.set_yticks(yticks)
fit.set_yticklabels(yticks)
# Create legend
legend = fit.legend(loc='best',fontsize=15)
legend.get_frame().set_linewidth(0.0)
for legobj in legend.legendHandles:
legobj.set_linewidth(4.0)
# Begin residual subplot
res = plt.subplot2grid((3,1), (2, 0))
# Plot model residuals
res.plot(wvs,(compspec[pixdown:pixup] - (modelspec[pixdown:pixup]+fitspec[pixdown:pixup])),lw=0.3,color=colors[-1])
# Mark median measurement uncertainty
res.axhline(-np.ma.median(errspec[pixdown:pixup]),color='k')
res.axhline(np.ma.median(errspec[pixdown:pixup]),color='k')
res.set_xlim(wvs[0],wvs[-1])
# Add minor ticks and labels
ymax=0.02
ymin=-0.02
yticks = np.linspace(ymin,ymax,5)
res.set_ylim(ymin,ymax)
res.set_yticks(yticks)
res.set_yticklabels(yticks)
xminorlocator = AutoMinorLocator()
res.xaxis.set_minor_locator(xminorlocator)
yminorlocator = AutoMinorLocator()
res.yaxis.set_minor_locator(yminorlocator)
res.set_ylabel('residuals')
res.set_xlabel('$\lambda\,\,(\mu m)$',fontsize=26)
# Adjust subplot positions and save figure
plt.subplots_adjust(bottom=0.2)
plt.savefig('{0}/model_comp_star{1}.pdf'.format(figdir,indx))
In [ ]:
subrcmodel = acs.pklread('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/eig20_minSNR50_corrNone_meanMed.pkl'.format(datadir))
# Retrieve data
subrcmodel=getarrays(subrcmodel)
# Reconstruct unsaved data from model
subrcmodel=reconstruct_EMPCA_data('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),subrcmodel,minStarNum=5)
# Start with baseline as
totalapprox = np.tile(np.ma.mean(subrcmodel.residuals,axis=0),(subrcmodel.residuals.shape[0],1))
nvec = 4
for i in range(nvec):
totalapprox += np.outer(subrcmodel.coeff[:, i], subrcmodel.eigvec[i])
In [ ]:
# Choose star
indx=312
# Retrieve plot colours
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,2))
# Set bounds on the apStarGrid
pixup = 8575
pixdown = 0
# Convert spectra to apStarGrid and reapply mask
fitspec = toApStarGrid(subrcmodel.fitspec[indx])
fitspec = np.ma.masked_array(fitspec,mask=fitspec==0)
modelspec = toApStarGrid(totalapprox[indx])
modelspec = np.ma.masked_array(modelspec,mask=modelspec==0)
compspec = toApStarGrid(subrcmodel.spectra[indx])
compspec = np.ma.masked_array(compspec,mask=(modelspec.mask|fitspec.mask))
resspec = toApStarGrid(subrcmodel.residuals[indx])
resspec = np.ma.masked_array(resspec,mask=compspec.mask)
errspec = toApStarGrid(subrcmodel.spectra_errs[indx])
errspec = np.ma.masked_array(errspec,mask=compspec.mask)
# Create array of wavelength values
wvs = pix2wv(np.arange(pixdown,pixup),apStarWavegrid = True)/1e4
# Specify xtick location
stepsize = 0.05
xticks = np.linspace(wvs[0],wvs[-1]+stepsize,stepsize)
# Initialize figure
plt.figure(figsize=(15,5))
# Begin model subplot
fit = plt.subplot2grid((3,1), (0, 0), rowspan=2)
# Plot spectrum and model
fit.plot(wvs,compspec[pixdown:pixup],lw=0.5,color=colors[0],label='original spectrum')
fit.plot(wvs,(modelspec[pixdown:pixup]+fitspec[pixdown:pixup]),lw=0.5,color=colors[-1],label='model spectrum:\n{0} principal components'.format(nvec))
fit.set_xlim(wvs[0],wvs[-1])
# Add minor ticks and labels
xminorlocator = AutoMinorLocator()
fit.xaxis.set_minor_locator(xminorlocator)
yminorlocator = AutoMinorLocator()
fit.yaxis.set_minor_locator(yminorlocator)
fit.set_xticklabels(['']*len(xticks))
fit.set_ylabel('normalized flux')
ymax=1.1
ymin=0.6
yticks = np.linspace(ymin,ymax,6)
fit.set_yticks(yticks)
fit.set_yticklabels(yticks)
# Create legend
legend = fit.legend(loc='best',fontsize=15)
legend.get_frame().set_linewidth(0.0)
for legobj in legend.legendHandles:
legobj.set_linewidth(4.0)
# Begin residual subplot
res = plt.subplot2grid((3,1), (2, 0))
# Plot model residuals
res.plot(wvs,(compspec[pixdown:pixup] - (modelspec[pixdown:pixup]+fitspec[pixdown:pixup])),lw=0.3,color=colors[-1])
# Mark median measurement uncertainty
res.axhline(-np.ma.median(errspec[pixdown:pixup]),color='k')
res.axhline(np.ma.median(errspec[pixdown:pixup]),color='k')
res.set_xlim(wvs[0],wvs[-1])
# Add minor ticks and labels
ymax=0.02
ymin=-0.02
yticks = np.linspace(ymin,ymax,5)
res.set_ylim(ymin,ymax)
res.set_yticks(yticks)
res.set_yticklabels(yticks)
xminorlocator = AutoMinorLocator()
res.xaxis.set_minor_locator(xminorlocator)
yminorlocator = AutoMinorLocator()
res.yaxis.set_minor_locator(yminorlocator)
res.set_ylabel('residuals')
res.set_xlabel('$\lambda\,\,(\mu m)$',fontsize=26)
# Adjust subplot positions and save figure
plt.subplots_adjust(bottom=0.2)
plt.savefig('{0}/model_comp_star{1}_{2}PC.pdf'.format(figdir,indx,nvec))
In [29]:
subrcmodel = acs.pklread('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/eig20_minSNR50_corrNone_meanMed.pkl'.format(datadir))
# Retrieve data
subrcmodel=getarrays(subrcmodel)
# Reconstruct unsaved data from model
subrcmodel=reconstruct_EMPCA_data('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),subrcmodel,minStarNum=5)
# Choose star
indx=312
# Set bounds on the apStarGrid
pixup = 8575
pixdown = 0
# Convert spectra to apStarGrid and reapply mask
fitspec = toApStarGrid(subrcmodel.fitspec[indx])
fitspec = np.ma.masked_array(fitspec,mask=fitspec==0)
compspec = toApStarGrid(subrcmodel.spectra[indx])
compspec = np.ma.masked_array(compspec,mask=(modelspec.mask|fitspec.mask))
resspec = toApStarGrid(subrcmodel.residuals[indx])
resspec = np.ma.masked_array(resspec,mask=compspec.mask)
errspec = toApStarGrid(subrcmodel.spectra_errs[indx])
errspec = np.ma.masked_array(errspec,mask=compspec.mask)
# Create array of wavelength values
wvs = pix2wv(np.arange(pixdown,pixup),apStarWavegrid = True)/1e4
# Reconstruct unsaved data from model
subrcmodel=reconstruct_EMPCA_data('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),subrcmodel,minStarNum=5)
# Start with baseline as
plt.figure(figsize=(15,5))
res = plt.subplot(111)
nvecs = np.arange(0,21)
# Retrieve plot colours
respecs = np.ma.masked_array(np.zeros((len(nvecs),8575)))
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(nvecs)))
for nvec in range(len(nvecs)):
totalapprox = np.tile(np.ma.mean(subrcmodel.residuals,axis=0),(subrcmodel.residuals.shape[0],1))
for i in range(nvecs[nvec]):
totalapprox += np.outer(subrcmodel.coeff[:, i], subrcmodel.eigvec[i])
modelspec = toApStarGrid(totalapprox[indx])
modelspec = np.ma.masked_array(modelspec,mask=modelspec==0)
respecs[nvec] = compspec[pixdown:pixup] - (modelspec[pixdown:pixup]+fitspec[pixdown:pixup])
res.plot(wvs,(compspec[pixdown:pixup] - (modelspec[pixdown:pixup]+fitspec[pixdown:pixup])),lw=0.5,color=colors[nvec],alpha=0.1)
# Mark median measurement uncertainty
res.axhline(-np.ma.median(errspec[pixdown:pixup]),color='k')
res.axhline(np.ma.median(errspec[pixdown:pixup]),color='k')
res.set_xlim(wvs[0],wvs[-1])
xminorlocator = AutoMinorLocator()
res.xaxis.set_minor_locator(xminorlocator)
res.set_ylabel('residuals')
res.set_xlabel('$\lambda\,\,(\mu m)$',fontsize=26)
Out[29]:
In [30]:
plt.figure(figsize=(10,8))
plt.axes(yscale='log')
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,11))
errbound = np.ma.median(errspec)
xmin=-0.03
xmax=0.03
for n in [0,4,8]:#,15,20]:
edges = np.linspace(xmin,xmax,100)
hist,bin_edges = np.histogram(respecs[n][respecs[n].mask==False],bins=edges)
xcoords = (bin_edges + ((np.roll(bin_edges,1)-bin_edges)/2.))[1:]
widths = (np.roll(bin_edges,1)-bin_edges)[1:]
plt.step(xcoords,hist,color=colors[n],linewidth=3,label='{0} PCs'.format(nvecs[n]))
plt.ylabel('number of pixels',fontsize=16)
plt.xlabel('model residuals',fontsize=16)
legend = plt.legend(loc='best')
legend.get_frame().set_linewidth(0.0)
plt.savefig('{0}/model_residual_histogram.pdf'.format(figdir))
In [5]:
subrcmodel = acs.pklread('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/eig20_minSNR50_corrNone_meanMed.pkl'.format(datadir))
# Retrieve data
subrcmodel=getarrays(subrcmodel)
# Reconstruct unsaved data from model
subrcmodel=reconstruct_EMPCA_data('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),subrcmodel,minStarNum=5)
# Choose star
#indx=np.random.randint(0,len(subrcmodel.fitspec))
# Set bounds on the apStarGrid
pixup = 8575
pixdown = 0
nstar = len(subrcmodel.spectra)
# Convert spectra to apStarGrid and reapply mask
fitspecs = np.ma.masked_array(np.zeros((nstar,8575)))
compspecs = np.ma.masked_array(np.zeros((nstar,8575)))
resspecs = np.ma.masked_array(np.zeros((nstar,8575)))
errspecs = np.ma.masked_array(np.zeros((nstar,8575)))
for indx in tqdm(range(nstar)):
fitspec = toApStarGrid(subrcmodel.fitspec[indx])
fitspecs[indx] = np.ma.masked_array(fitspec,mask=fitspec==0)
compspec = toApStarGrid(subrcmodel.spectra[indx])
compspecs[indx] = np.ma.masked_array(compspec,mask=(modelspec.mask|fitspecs[indx].mask))
resspec = toApStarGrid(subrcmodel.residuals[indx])
resspecs[indx] = np.ma.masked_array(resspec,mask=compspecs[indx].mask)
errspec = toApStarGrid(subrcmodel.spectra_errs[indx])
errspecs[indx] = np.ma.masked_array(errspec,mask=compspecs[indx].mask)
# Create array of wavelength values
wvs = pix2wv(np.arange(pixdown,pixup),apStarWavegrid = True)/1e4
In [23]:
nvecs = np.arange(0,21)
# Retrieve plot colours
fig=plt.figure(figsize=(10,8))
ax=plt.subplot(111)
ax.set_yscale('log')
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,9))
modelspecs = np.ma.masked_array(np.zeros((nstar,8575)))
xmin=-0.05
xmax=0.05
s = 0
respecs = np.ma.masked_array(np.zeros((len(nvecs)*nstar,8575)))
for nvec in tqdm([0,4,8]):#,15,20]):
totalapprox = np.tile(np.ma.mean(subrcmodel.residuals,axis=0),(subrcmodel.residuals.shape[0],1))
for i in range(nvecs[nvec]):
totalapprox += np.outer(subrcmodel.coeff[:, i], subrcmodel.eigvec[i])
for indx in range(nstar):
modelspec = toApStarGrid(totalapprox[indx])
modelspecs[indx] = np.ma.masked_array(modelspec,mask=modelspec==0)
specs = compspecs - (modelspecs+fitspecs)
respecs[s:s+nstar] = specs
s+=nstar
step = 0.0006
edges = np.arange(xmin,xmax,step)
hist,bin_edges = np.histogram(specs[specs.mask==False],bins=edges)
xcoords = (bin_edges + ((np.roll(bin_edges,1)-bin_edges)/2.))[1:]
widths = (np.roll(bin_edges,1)-bin_edges)[1:]
ax.step(xcoords,hist,color=colors[nvec],linewidth=3,label='{0} PCs'.format(nvecs[nvec]),zorder=1)
ax.set_ylabel('number of pixels',fontsize=18)
ax.set_xlabel('model residuals',fontsize=18)
xminorlocator = AutoMinorLocator()
ax.xaxis.set_minor_locator(xminorlocator)
legend = plt.legend(loc='best')
legend.get_frame().set_linewidth(0.0)
ax.tick_params(which='major',direction='in',length=5,width=2,bottom=True,top=True,left=True,right=True)
ax.tick_params(which='minor',direction='in',length=4,width=1.5,bottom=True,top=True,left=True,right=True)
plt.savefig('{0}/model_fullresidual_histogram.pdf'.format(figdir))
In [195]:
k = np.where(respecs<-0.2)[1]
hist,bin_edges = np.histogram(k,bins=8575)
xcoords = (bin_edges + ((np.roll(bin_edges,1)-bin_edges)/2.))[1:]
#widths = (np.roll(bin_edges,1)-bin_edges)[1:]
#plt.bar(xcoords,hist,widths,color='blue',linewidth=2,alpha=0.8,edgecolor='k')
plt.plot(xcoords,hist)
Out[195]:
In [11]:
oc = empca_residuals('apogee','clusters',maskFilter,ask=True,degree=2,datadir = datadir)
oc.findResiduals(gen=True)
oc.pixelEMPCA(nvecs=5,varfunc=np.ma.var,savename='eig5_minSNR50_corrNone_var.pkl')
oc.pixelEMPCA(nvecs=5,varfunc=meanMed,savename='eig5_minSNR50_corrNone_meanMed.pkl')
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: CLUSTER
Default is full range. Match or slice? match
Match value: N6819
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
In [12]:
oc = empca_residuals('apogee','clusters',maskFilter,ask=True,degree=2,datadir = datadir)
oc.findResiduals(gen=True)
oc.pixelEMPCA(nvecs=5,varfunc=np.ma.var,savename='eig5_minSNR50_corrNone_var.pkl')
oc.pixelEMPCA(nvecs=5,varfunc=meanMed,savename='eig5_minSNR50_corrNone_meanMed.pkl')
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: CLUSTER
Default is full range. Match or slice? match
Match value: N6819
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
In [13]:
oc = empca_residuals('apogee','clusters',maskFilter,ask=True,degree=2,badcombpixmask=7935,datadir = datadir)
oc.findResiduals(gen=True)
oc.pixelEMPCA(nvecs=5,varfunc=np.ma.var,savename='eig5_minSNR50_corrNone_var.pkl')
oc.pixelEMPCA(nvecs=5,varfunc=meanMed,savename='eig5_minSNR50_corrNone_meanMed.pkl')
In [14]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/clusters_12_CLUSTER_matchN6819/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['Standard mask',
'Fiber cut',
'Fiber cut and \npersistence\nmask']
models = ['eig5_minSNR50_corrNone_var.pkl',
'eig5_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{mean}}$',r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))
In [100]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/clusters_12_CLUSTER_matchN6819/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819/bm4351/fibfit'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
titles = ['Standard mask',
'Fiber fit',
'Fiber cut',
'Fiber cut and \npersistence\nmask',
'Fiber fit']
models = [#'eig5_minSNR50_corrNone_var.pkl',
'eig5_minSNR50_corrNone_meanMed.pkl']
labels = [#r'$R^2_{\mathrm{mean}}$',
r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))
In [107]:
m13 = empca_residuals('apogee','clusters',maskFilter,ask=True,degree=2,datadir=datadir,badcombpixmask=7935,fibfit=True)
In [119]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/clusters_12_CLUSTER_matchM13/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchM13_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchM13_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/clusters_12_CLUSTER_matchM13_MEANFIB_up300.0_lo0.0/bm7935/fibfit'.format(datadir)]
titles = ['Standard mask',
'Fiber cut',
'Fiber cut and \npersistence\nmask',
'Fiber fit']
models = [#'eig5_minSNR50_corrNone_var.pkl',
'eig5_minSNR50_corrNone_meanMed.pkl']
labels = [#r'$R^2_{\mathrm{mean}}$',
r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))
In [14]:
plt.plot(1,1,'o',color='r',markeredgecolor='k')
Out[14]:
In [14]:
import spectralspace.sample.access_spectrum as acs
import sys
import spectralspace.analysis.empca_residuals
import comparative_plots
reload(comparative_plots)
from comparative_plots import *
sys.modules['empca_residuals'] = spectralspace.analysis.empca_residuals
direcs = ['{0}/clusters_12_CLUSTER_matchN6819/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['Standard mask',
'Fiber cut',
'Fiber cut and \npersistence\nmask']
models = ['eig5_minSNR50_corrNone_var.pkl',
'eig5_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{mean}}$',r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))
Run rc_example_method_comp.py 3 times after modifying the datadir
variable to match the one defined at the top of this notebook (local directory by default). Each run takes of order a day to complete, so consider running them in parallel and use a tool like screen to allow them to run in the background.
The script will prompt for responses for each of the three runs. Each will create subsamples of the full sample to use in jackknifing - the number of subsamples run in parallel at a given time is controlled by the maxsamp
parameter at the top of the file. Fill out the prompts for each run with the responses given below.
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4700
Found good limits
And/or? done
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4700
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
bmask
variable to 7935Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4700
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
In [15]:
from spectralspace.analysis.empca_residuals import empca_residuals
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['Standard\nmask',
'Fiber cut and\nstandard\nmask',
'Fiber cut and\npersistence\nmask']
models = ['eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{mean}}$',r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6),subsamples=25)
In [14]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/ctmnorm/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/fibrefit/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['Standard\nmask',
'Fiber cut and\nstandard\nmask',
'Fiber cut and\npersistence\nmask',
'Continuum \nrenormalized',
'Fiber fit']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [17]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4800.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/fibrefit/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['Standard\nmask',
'Fiber cut and\nstandard\nmask',
'Fiber cut and\npersistence\nmask',
'Fiber fit']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [7]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4800.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4800.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
titles = ['Fiber cut and\npersistence\nmask',
'Fiber fit']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [9]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
titles = ['Fiber cut and\npersistence\nmask',
'Fiber fit']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [3]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up15.0_lo0.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up5.0_lo0.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/fibfit/'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_harshcut/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_bigcut/bm7935'.format(datadir),
#'{0}/fibfit/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up15.0_lo0.0/bm7935/fibfit'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up5.0_lo0.0/bm7935/fibfit'.format(datadir)]
titles = ['No fit',
'Sigma cut\nno fit',
'Sigma cut\nsmall, no fit',
'Fiber fit',
'Harsh fiber cut',
'Big fiber cut',
'Sigma cut',
'small sigma cut']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [215]:
print '{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_harshcut/bm7935'.format(datadir)
In [137]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_13_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_13_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up15.0_lo0.0/bm7935'.format(datadir)]
titles = ['No fit',
'Sigma cut\nno fit',
'Sigma cut\nsmall, no fit',
'Fiber fit',
'Sigma cut',
'Big sigma cut']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [62]:
a = acs.pklread('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/eig20_minSNR50_corrNone_meanMed.pkl'.format(datadir))
b = acs.pklread('{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up15.0_lo0.0/bm7935/eig20_minSNR50_corrNone_meanMed.pkl'.format(datadir))
In [64]:
a.Vdata, b.Vdata, a.Vnoise, b.Vnoise
Out[64]:
In [50]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0_SIGFIB_up15.0_lo0.0/bm7935'.format(datadir),
'{0}/fibfit/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['No fit',
'Sigma cut\n no fit',
'Fiber fit',
'Sigma cut',
'Big sigma cut']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [117]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_giant_12_LOGG_up4.0_lo3.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_giant_12_LOGG_up4.0_lo3.0_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
titles = ['No fit',
'Fiber fit',]
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [118]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_giant_12_LOGG_up3.0_lo2.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_giant_12_LOGG_up3.0_lo2.0_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
titles = ['No fit',
'Fiber fit',]
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [205]:
data1 = np.load('{0}/meanfibdata.npy'.format(datadir))
data2 = np.load('{0}/meanfibdata2.npy'.format(datadir))
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title('Sigma cut')
plt.hist(data1,bins=300)
plt.ylim(0,60)
plt.xlim(100,300)
plt.xlabel('Fibre number')
plt.ylabel('Number of stars')
plt.axvline(119,color='k')
plt.axvline(123,color='k')
plt.axvline(148,color='k')
plt.axvline(152,color='k')
plt.axvline(178,color='k')
plt.axvline(182,color='k')
plt.axvline(208,color='k')
plt.axvline(212,color='k')
plt.axvline(238,color='k')
plt.axvline(242,color='k')
plt.axvline(268,color='k')
plt.axvline(272,color='k')
plt.subplot(122)
plt.title('No sigma cut')
plt.hist(data2,bins=200)
plt.ylim(0,60)
plt.xlabel('Fibre number')
plt.ylabel('Number of stars')
Out[205]:
In [15]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up150.0_lo100.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up200.0_lo150.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up250.0_lo200.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo250.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir)]
titles = ['100-150',
'150-200',
'200-250',
'250-300',
'100-300']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [22]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up150.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up200.0_lo150.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up250.0_lo200.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo250.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up200.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo200.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['100-150',
'150-200',
'200-250',
'250-300',
'100-200',
'200-300',
'100-300']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [26]:
import spectralspace.analysis.empca_residuals as empcares
import sys
sys.modules['empca_residuals'] = empcares
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up125.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up150.0_lo125.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up175.0_lo150.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up200.0_lo175.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up225.0_lo200.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up250.0_lo225.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up275.0_lo250.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo275.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up200.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo200.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['100-125',
'125-150',
'150-175',
'175-200',
'200-225',
'225-250',
'250-275',
'275-300',
'100-200',
'200-300',
'100-300']
models = [#'eig20_minSNR50_corrNone_var.pkl',
'eig20_minSNR50_corrNone_meanMed.pkl']
labels = [r'$R^2_{\mathrm{MAD}}$']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrastR2_methods(direcs,models,labels,colors,titles=titles,figsize=(15,6))#,subsamples=25)
In [23]:
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm4351'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/ctmnorm/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
titles = ['Standard mask',
'Fiber cut',
'Fiber cut and \npersistence\nmask',
'Continuum \nrenormalized']
models = ['eig20_minSNR50_corrNone_meanMed.pkl']
labels = ['']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
contrast_Ncells(direcs,models,labels,colors,titles=titles,figsize=(15,6),generate = True,makemodel=True)
Run sample_jackknife.py 5 times after modifying the datadir
variable to match the one defined at the top of this notebook (local directory by default). Each run takes of order a day to complete, so consider running them in parallel and use a tool like screen to allow them to run in the background.
The script will prompt for responses for each of the five runs. Each will create subsamples of the full sample to use in jackknifing - the number of subsamples run in parallel at a given time is controlled by the maxsamp
parameter at the top of the file. Fill out the prompts for each run with the responses given below.
For strict reproduction, one also needs to fix the random seed to be consistent. This will only ensure reproducability on the same machine, not between users, but I've put the random seeds used by the authors for each run, which can be set by changing the seed
variable.
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4800
Lower limit (Enter for minimum): 4700
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4700
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: TEFF
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4900
Lower limit (Enter for minimum): 4800
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
Change the sample_type
variable from 'red_clump'
to 'red_giant'
for subsequent runs.
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: LOGG
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 4
Lower limit (Enter for minimum): 3
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
Which data release? (Enter for 12): <press Enter>
Type done at any prompt when finished
Data key: LOGG
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 3
Lower limit (Enter for minimum): 2
Found good limits
And/or? and
Data key: MEANFIB
Default is full range. Match or slice? slice
Upper limit (Enter for maximum): 300
Lower limit (Enter for minimum): 100
Found good limits
And/or? done
In [17]:
direcs = ['{0}/red_clump_12_TEFF_up4800.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_giant_12_LOGG_up4.0_lo3.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_giant_12_LOGG_up3.0_lo2.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
labels = ['RC $4700\,\mathrm{K} < T_{\mathrm{eff}} < 4800\,\mathrm{K}$',
'RC $4700\,\mathrm{K} < T_{\mathrm{eff}} < 4900\,\mathrm{K}$',
'RC $4800\,\mathrm{K} < T_{\mathrm{eff}} < 4900\,\mathrm{K}$',
'RG $3.0 < \log g < 4.0$',
'RG $2.0 < \log g < 3.0$']
models = ['eig20_minSNR50_corrNone_meanMed.pkl']
# Set to empty list to use all available data
seeds = [44,26,36,95,62]
colours = plt.get_cmap('inferno')(np.linspace(0,0.8,len(models)*len(direcs)))
sample_compare_nvec(direcs,models,labels,subsamples=25,figsize=(8,8),seeds=seeds,colours=colours,
savename='nvec_comp.pdf',rotation=45,ha='right',bottom_margin=0.35)
Out[17]:
In [18]:
direcs = ['{0}/red_clump_12_TEFF_up4800.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_giant_12_LOGG_up4.0_lo3.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir),
'{0}/red_giant_12_LOGG_up3.0_lo2.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
labels = ['RC $4700\,\mathrm{K} < T_{\mathrm{eff}} < 4800\,\mathrm{K}$',
'RC $4700\,\mathrm{K} < T_{\mathrm{eff}} < 4900\,\mathrm{K}$',
'RC $4800\,\mathrm{K} < T_{\mathrm{eff}} < 4900\,\mathrm{K}$',
'RG $3.0 < \log g < 4.0$',
'RG $2.0 < \log g < 3.0$']
models = ['eig20_minSNR50_corrNone_meanMed.pkl']
# Set to empty list to use all available data
seeds = [44,26,36,95,62]
colours = plt.get_cmap('inferno')(np.linspace(0,0.8,len(models)*len(direcs)))
sample_compare_ncells(direcs,models,labels,subsamples=25,generate=True,seeds=seeds,figsize=(8,8),colours=colours,
savename='ncells_comp.pdf',rotation=45,ha='right',bottom_margin=0.35)
Out[18]:
In [19]:
direcs = ['/geir_data/scr/price-jones/Data/apogee_dim_reduction/red_clump_12_TEFF_up4800.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935',
'/geir_data/scr/price-jones/Data/apogee_dim_reduction/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935',
'/geir_data/scr/price-jones/Data/apogee_dim_reduction/red_clump_12_TEFF_up4900.0_lo4800.0_MEANFIB_up300.0_lo100.0/bm7935',
'/geir_data/scr/price-jones/Data/apogee_dim_reduction/red_giant_12_LOGG_up4.0_lo3.0_MEANFIB_up300.0_lo100.0/bm7935',
'/geir_data/scr/price-jones/Data/apogee_dim_reduction/red_giant_12_LOGG_up3.0_lo2.0_MEANFIB_up300.0_lo100.0/bm7935']
titles = ['RC $4700\,\mathrm{K} < T_{\mathrm{eff}} < 4800\,\mathrm{K}$',
'RC $4700\,\mathrm{K} < T_{\mathrm{eff}} < 4900\,\mathrm{K}$',
'RC $4800\,\mathrm{K} < T_{\mathrm{eff}} < 4900\,\mathrm{K}$',
'RG $3.0 < \log g < 4.0$',
'RG $2.0 < \log g < 3.0$']
models = ['eig20_minSNR50_corrNone_meanMed.pkl']
labels = ['']
colors = plt.get_cmap('plasma')(np.linspace(0,0.85,len(models)))
# Set to empty list to use all available data
seeds = [44,26,25,95,62]
p = contrast_Ncells(direcs,models,labels,colors,titles=titles,seeds=seeds,figsize=(15,6),ybounds=(1,1e20),generate=True,subsamples=25,denom=consth,ncent=10)
Ncells_expo = np.median(p[:,0])
delta_Ncells_expo = np.sqrt(meanMed(p[:,0]))
Ncells_fact = np.median(p[:,1])
delta_Ncells_fact = np.sqrt(meanMed(p[:,1]))
print Ncells_expo, delta_Ncells_expo, Ncells_fact, delta_Ncells_fact
In [104]:
direcs = ['{0}/clusters_12_CLUSTER_matchN6819_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
# Number of eigenvectors to plot
n=5
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig5_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec = subrcmodel['eigvec']
coeff = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff = np.tile(np.median(np.sqrt(coeff**2),axis=0),(7214,1)).T
oset = 0.01
plt.figure(figsize=(15,8))
plot_fullvec((eigvec*medcoeff)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',
maxval=0.67)
plt.savefig('{0}/median_scaled_eigvec.pdf'.format(figdir))
In [3]:
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
# Number of eigenvectors to plot
n=10
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig20_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec = subrcmodel['eigvec']
coeff = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff = np.tile(np.median(np.sqrt(coeff**2),axis=0),(7214,1)).T
oset = 0.007
plt.figure(figsize=(15,8))
plot_fullvec((eigvec*medcoeff)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',
maxval=0.67)
plt.savefig('{0}/median_scaled_eigvec.pdf'.format(figdir))
plt.axvline(1.527)
Out[3]:
In [94]:
direcs = ['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935/fibfit'.format(datadir)]
# Number of eigenvectors to plot
n=10
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig20_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec4 = subrcmodel['eigvec']
coeff4 = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff4 = np.tile(np.median(np.sqrt(coeff4**2),axis=0),(7214,1)).T
oset = 0.007
plt.figure(figsize=(15,8))
plot_fullvec((eigvec4*medcoeff4)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',
maxval=0.67)
#plt.savefig('{0}/median_scaled_eigvec.pdf'.format(figdir))
In [31]:
direcs = ['{0}/red_clump_12_TEFF_up4750.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
# Number of eigenvectors to plot
n=10
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig20_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec = subrcmodel['eigvec']
coeff = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff = np.tile(np.median(np.sqrt(coeff**2),axis=0),(7214,1)).T
oset = 0.007
plt.figure(figsize=(15,8))
plot_fullvec((eigvec*medcoeff)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',
maxval=0.67)
plt.savefig('{0}/median_scaled_eigvec.pdf'.format(figdir))
In [95]:
plt.figure(figsize=(15,8))
fibvecs = eigvec4*medcoeff4
origvecs = eigvec*medcoeff
plot_fullvec((fibvecs-origvecs)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [47]:
corrcoefs = []
plt.figure(1)
for i in range(8):
plt.figure(i+2)
elem = 'NI_H'
x = rcdist_sample.matchingData[elem][rcdist_sample.matchingData[elem]!=-9999]
y = coeff[:,i][rcdist_sample.matchingData[elem]!=-9999]
plt.plot(x,y,'.',alpha=0.1)
plt.axhline(0,color='k')
plt.ylim(-1,1)
plt.ylabel('coefficient')
corr = np.corrcoef(x,y)[0,1]
plt.title('PC {0} - r = {1}'.format(i+1,corr))
corrcoefs.append(corr)
corrcoefs = np.fabs(np.array(corrcoefs))
plt.figure(1)
plt.plot(corrcoefs,'o-')
print 'max {0}'.format(np.max(corrcoefs))
In [8]:
direcs=['{0}/ctmnorm/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
n=10
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig20_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec1 = subrcmodel['eigvec']
coeff1 = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff1 = np.tile(np.median(np.sqrt(coeff1**2),axis=0),(7214,1)).T
oset = 0.007
plt.figure(figsize=(15,8))
plot_fullvec((eigvec1*medcoeff1)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [122]:
direcs=['{0}/fibrefit/red_clump_12_TEFF_up4800.0_lo4700.0_MEANFIB_up300.0_lo100.0/bm7935'.format(datadir)]
n=10
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig20_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec2 = subrcmodel['eigvec']
coeff2 = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff2 = np.tile(np.median(np.sqrt(coeff2**2),axis=0),(7214,1)).T
oset = 0.007
plt.figure(figsize=(15,8))
plot_fullvec((eigvec2*medcoeff2)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [123]:
direcs=['{0}/red_clump_12_TEFF_up4900.0_lo4700.0_MEANFIB_up300.0_lo100.0_SIGFIB_up15.0_lo0.0/bm7935'.format(datadir)]
n=10
# Pixel range
pixup = 8575
pixdown = 0
# Load and assign model data
subrcmodel = np.load('{0}/eig20_minSNR50_corrNone_meanMed.pkl_data.npz'.format(direcs[0]))
eigvec3 = subrcmodel['eigvec']
coeff3 = subrcmodel['coeff']
# Scale components by median coefficient and offset each component
medcoeff3 = np.tile(np.median(np.sqrt(coeff3**2),axis=0),(7214,1)).T
oset = 0.007
plt.figure(figsize=(15,8))
plot_fullvec((eigvec3*medcoeff3)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [108]:
plt.figure(figsize=(15,8))
ctmvecs = eigvec1*medcoeff1
origvecs = eigvec*medcoeff
plot_fullvec((ctmvecs-origvecs)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [124]:
plt.figure(figsize=(15,8))
fibvecs = eigvec2*medcoeff2
origvecs = eigvec*medcoeff
plot_fullvec((fibvecs-origvecs)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [129]:
plt.figure(figsize=(15,8))
fibvecs = eigvec3*medcoeff3
origvecs = eigvec*medcoeff
plot_fullvec((fibvecs-origvecs)[:n],n=n,pixup=pixup,pixdown=pixdown,oset=oset,ncol = 4,cmap='plasma',maxval=0.67)
In [4]:
import apogee.modelspec.turbospec
from apogee.modelatm import atlas9
#atm= atlas9.Atlas9Atmosphere(teff=4800.,logg=2.6,metals=0.5,am=0.25,cm=0.25)
# The following takes a while ...
#synspeca= apogee.modelspec.turbospec.synth([26,-0.25],modelatm=atm,
# linelist='201404080919',lsf='all',cont='cannon',
# vmacro=6.,isotopes='solar')
In [5]:
from apogee.tools import toAspcapGrid,toApStarGrid
#synspec = toAspcapGrid((synspeca[0]-1)/4.)
In [6]:
from apogee.tools import wv2pix,pix2wv
from apogee.spec.plot import _SII_lines,_CAI_lines,_FEI_lines,_MGI_lines,_ALI_lines,_SI_lines,_KI_lines,_TII_lines,_VI_lines,_CRI_lines,_MNI_lines,_COI_lines,_NII_lines,_CUI_lines,_NAI_lines,_OH_lines,_CO_lines,_CN_lines
linelist = [_SII_lines,_CAI_lines,_FEI_lines,_MGI_lines]#,_ALI_lines,_SI_lines,_KI_lines,_TII_lines,_VI_lines,_CRI_lines,_MNI_lines,_COI_lines,_NII_lines,_CUI_lines,_NAI_lines,_OH_lines,_CO_lines,_CN_lines]
labels = ['Si','Ca','Fe','Mg']
colors = plt.get_cmap('viridis')(np.linspace(0,0.8,len(linelist)))
pixel_centres = wv2pix([(1.5+0.115623)*1e4,(1.5+0.168522)*1e4],apStarWavegrid=False)
spr = 100
plt.figure(figsize=(15,10))
i = 1
num = 3
for e in range(num):
for p in range(len(pixel_centres)):
for l in range(len(linelist)):
for ll in range(len(linelist[l])):
plt.axvline(linelist[l][ll]/1e4,color=colors[l],alpha=0.6,lw=3,label=labels[l])
c = pixel_centres[p]
plt.subplot(num,len(pixel_centres),i)
wv = pix2wv(np.arange(c-spr,c+spr),apStarWavegrid=False)
plt.plot(wv/1e4,eigvec[e][c-spr:c+spr],color='k')
plt.xlim(min(wv)/1e4,max(wv)/1e4)
plt.ylim(-0.03,0.03)
if i==0:
legend = plt.legend(loc='best')
legend.get_frame().set_linewidth(0.0)
i+=1
plt.tight_layout()
In [7]:
from apogee.tools import wv2pix,pix2wv
from apogee.spec.plot import _SII_lines,_CAI_lines,_FEI_lines,_MGI_lines,_ALI_lines,_SI_lines,_KI_lines,_TII_lines,_VI_lines,_CRI_lines,_MNI_lines,_COI_lines,_NII_lines,_CUI_lines,_NAI_lines,_OH_lines,_CO_lines,_CN_lines
linelist = [_SII_lines,_CAI_lines,_FEI_lines,_MGI_lines,_ALI_lines,_SI_lines,_KI_lines,_TII_lines,_VI_lines,_CRI_lines,_MNI_lines,_COI_lines,_NII_lines,_CUI_lines,_NAI_lines,_OH_lines,_CO_lines,_CN_lines]
linelist = [_KI_lines]
labels = ['Si','Ca','Fe','Mg']
colors = plt.get_cmap('viridis')(np.linspace(0,0.8,len(linelist)))
e = 2
k = np.arange(1.516,1.70,0.01)
from apogee.spec.window import tophat
elems = ['C','N','O','Na','Mg','Al','Si','S','K','Ca','Ti','V','Mn','Fe','Ni']
for elem in elems:
plt.figure(figsize=(15,35))
hat = tophat(elem,dr=12,apStarWavegrid=True)
inds = np.where(hat==True)[0]
diff = np.gradient(inds)
trans = np.where(diff>1)[0]
windows = []
ymin = -0.07
ymax = 0.07
r = 0
while r < len(trans):
if r == 0:
windows.append((inds[0],inds[trans[r]]))
r+=1
elif r > 0 and r < len(trans)-1:
windows.append((inds[trans[r]],inds[trans[r+1]]))
r+=2
elif r == len(trans)-1:
windows.append((inds[trans[r]],inds[-1]))
r+=1
for i in range(len(k)-1):
plt.subplot(len(k),1,i+1)
plt.text(k[i]+0.0005,0.05-0.01,elem)
wvrange = np.array([k[i]*1e4,k[i+1]*1e4])
pixmin,pixmax = wv2pix(wvrange,apStarWavegrid=True)
if pixmin < 0:
pixmin = 0
if pixmax > 8575:
pixmax = 8575
wv = pix2wv(np.arange(pixmin,pixmax),apStarWavegrid=True)
plt.plot(wv/1e4,toApStarGrid(eigvec[e])[pixmin:pixmax],color='C0',lw=2)
for w in windows:
x = pix2wv(np.arange(w[0],w[1]),apStarWavegrid=True)
plt.fill_between(x/1e4, ymin, ymax, facecolor='C1', alpha=0.5)
plt.ylim(ymin,ymax)
plt.xlim(k[i],k[i+1])
plt.tight_layout()
In [71]:
from scipy.interpolate import interp1d
from scipy.optimize import brentq
doplot = True
rootfindtol = 2
rootmatchtol = 2
num = 1
k = np.arange(1.516,1.70,0.01)
def rootmatch(synspec,eigvec=eigvec,k=k,num=num,rootfindtol=rootfindtol,figs=None,
rootmatchtol=rootmatchtol,doplot=doplot,cutoff=0.01,figsize=(15,15),
pairrange=False,deriv2tol=0):
multimatch = 0
matchsuccesses = np.zeros(num)
matchfails = np.zeros(num)
totalroots = np.zeros(num)
matchfraction = np.zeros(num)
colors = plt.get_cmap('viridis')(np.linspace(0.5,0.9,num))
if not figs:
figs = len(k)
if doplot and figs == 1:
plt.figure(figsize=figsize)
if isinstance(cutoff,float):
cutoff = np.zeros(num)+cutoff
for i in range(len(k)-1):
if doplot and figs == 1:
ax = plt.subplot(len(k),1,i+1)
if doplot and figs > 1:
plt.figure(figsize=figsize)
if not pairrange:
wvrange = np.array([k[i]*1e4,k[i+1]*1e4])
elif pairrange:
wvrange = np.array([k[i][0]*1e4,k[i][1]*1e4])
pixmin,pixmax = wv2pix(wvrange,apStarWavegrid=True)
for e in range(num):
syn = toApStarGrid(synspec)
es = toApStarGrid(eigvec[e])
es[0:322] = np.nan
syn[0:322] = np.nan
es[3242:3648] = np.nan
syn[3242:3648] = np.nan
es[6048:6412] = np.nan
syn[6048:6412] = np.nan
es[8306:] = np.nan
syn[8306:] = np.nan
if doplot and figs > 1:
plt.subplot(num,1,e+1)
if pixmin < 0:
pixmin = 0
if pixmax > 8575:
pixmax = 8575
wv = pix2wv(np.arange(pixmin,pixmax),apStarWavegrid=True)
if doplot:
ax.plot(wv/1e4,es[pixmin:pixmax],color=colors[e],lw=2,label='PC {0}'.format(e+1))
ax.plot(wv/1e4,syn[pixmin:pixmax],color='k',ls='--',alpha=0.6,lw=2,label='synthetic\nspectrum')
eigfn = interp1d(wv/1e4,toApStarGrid(eigvec[e])[pixmin:pixmax])
eigderiv = interp1d(wv/1e4,np.gradient(toApStarGrid(np.fabs(eigvec[e]))[pixmin:pixmax]))
eigderiv2 = interp1d(wv/1e4,np.gradient(np.gradient(toApStarGrid(np.fabs(eigvec[e]))[pixmin:pixmax])))
synderiv = interp1d(wv/1e4,np.gradient(toApStarGrid(np.fabs(synspec))[pixmin:pixmax]))
synderiv2 = interp1d(wv/1e4,np.gradient(np.gradient(toApStarGrid(np.fabs(synspec))[pixmin:pixmax])))
synroots = []
eigroots = []
r = 0 + pixmin
while r+rootfindtol < pixmax:
a = pix2wv(r,apStarWavegrid=True)/1e4
b = pix2wv(r+rootfindtol,apStarWavegrid=True)/1e4
if np.sign(synderiv(a)) != np.sign(synderiv(b)):
root = brentq(synderiv,a,b)
if synderiv2(root) < deriv2tol:
synroots.append(root)
if np.sign(eigderiv(a)) != np.sign(eigderiv(b)):
root = brentq(eigderiv,a,b)
if eigderiv2(root) < deriv2tol:
eigroots.append(root)
r+=rootfindtol
synroots = np.array(synroots)
eigroots = np.array(eigroots)
matchroots = np.zeros(len(eigroots))
matchsuccess = 0
matchfail = 0
for t in range(len(eigroots)):
if np.fabs(eigfn(eigroots[t])) >= cutoff[e]:
rootdiff = wv2pix(synroots*1e4,apStarWavegrid=True)-wv2pix(eigroots[t]*1e4,apStarWavegrid=True)
matches = np.where(np.fabs(rootdiff)<rootmatchtol)[0]
if len(matches) > 1:
multimatch +=1
elif len(matches) == 1 and synroots[matches] not in matchroots:
#if doplot:
# plt.axvline(eigroots[t],color=colors[e],lw=2,alpha=0.3)
# plt.axvline(synroots[matches],color='C3',lw=2,alpha=0.3)
matchroots[t] = synroots[matches]
matchsuccess += 1
elif len(matches) == 0:
if doplot:
plt.axvline(eigroots[t],color='k',lw=2,alpha=0.3)
matchfail += 1
matchsuccesses[e] += matchsuccess
matchfails[e] += matchfail
totalroots[e] += len(eigroots)
if doplot:
xminorlocator = AutoMinorLocator()
yminorlocator = AutoMinorLocator()
ax.xaxis.set_minor_locator(xminorlocator)
ax.yaxis.set_minor_locator(yminorlocator)
ax.annotate('principal component magnitude',(0.03,0.65),xycoords='figure fraction',rotation=90)
ax.tick_params(which='major',direction='in',length=5,width=2,bottom=True,top=True,left=True,right=True)
ax.tick_params(which='minor',direction='in',length=4,width=1.5,bottom=True,top=True,left=True,right=True)
plt.axhline(-cutoff[e],lw=0.5,color='k',ls='--')
plt.axhline(cutoff[e],lw=0.5,color='k',ls='--')
plt.xlim(min(wv)/1e4,max(wv)/1e4)
plt.ylim(-0.07,0.07)
if doplot and figs > 1:
plt.tight_layout()
if doplot and figs == 1:
plt.xlabel('$\mu$m')
matplotlib.text.Annotation('normalized flux',(0.01,0.5),xycoords='figure fraction',rotation=90)
plt.subplots_adjust(hspace=0.5)
matchfraction = matchsuccesses.astype(float)/(matchfails+matchsuccesses)
return matchfraction,multimatch
In [26]:
atm= atlas9.Atlas9Atmosphere(teff=4800.,logg=2.6,metals=0.5,am=0.25,cm=0.25)
# The following takes a while ...
synspeca= apogee.modelspec.turbospec.synth([26,-0.25],modelatm=atm,
linelist='201404080919',lsf='all',cont='cannon',
vmacro=6.,isotopes='solar')
from apogee.tools import toAspcapGrid,toApStarGrid
synspec = toAspcapGrid((synspeca[0]-1)/4.)
In [14]:
rcdist_sample = empca_residuals('apogee','red_clump',maskFilter,ask=True,
badcombpixmask=7935,
datadir='/geir_data/scr/price-jones/Data/apogee_dim_reduction')
In [49]:
Nstars = 1
inds = np.random.randint(0,high=rcdist_sample.numberStars(),size=Nstars)
inds = np.array([3574])
inds = np.array([155])
teffs = rcdist_sample.teff[inds]
teffs[teffs==-9999] = np.median(teffs[teffs!=-9999])
loggs = rcdist_sample.logg[inds]
loggs[loggs==-9999] = np.median(loggs[loggs!=-9999])
m_hs = rcdist_sample.data['METALS']
m_hs[m_hs==-9999] = np.median(m_hs[m_hs!=-9999])
a_fes = rcdist_sample.data['ALPHAFE']
a_fes[a_fes==-9999] = np.median(a_fes[a_fes!=-9999])
fe_hs = rcdist_sample.fe_h[inds]
fe_hs[fe_hs==-9999] = np.median(fe_hs[fe_hs!=-9999])
c_hs = rcdist_sample.c_h[inds]
c_hs[c_hs==-9999] = np.median(c_hs[c_hs!=-9999])
n_hs = rcdist_sample.n_h[inds]
n_hs[n_hs==-9999] = np.median(n_hs[n_hs!=-9999])
o_hs = rcdist_sample.o_h[inds]
o_hs[o_hs==-9999] = np.median(o_hs[o_hs!=-9999])
na_hs = rcdist_sample.data['NA_H'][inds]
na_hs[na_hs==-9999] = np.median(na_hs[na_hs!=-9999])
mg_hs = rcdist_sample.data['MG_H'][inds]
mg_hs[mg_hs==-9999] = np.median(mg_hs[mg_hs!=-9999])
al_hs = rcdist_sample.data['AL_H'][inds]
al_hs[al_hs==-9999] = np.median(al_hs[al_hs!=-9999])
si_hs = rcdist_sample.data['SI_H'][inds]
si_hs[si_hs==-9999] = np.median(si_hs[si_hs!=-9999])
s_hs = rcdist_sample.data['S_H'][inds]
s_hs[s_hs==-9999] = np.median(s_hs[s_hs!=-9999])
k_hs = rcdist_sample.data['K_H'][inds]
k_hs[k_hs==-9999] = np.median(k_hs[k_hs!=-9999])
ca_hs = rcdist_sample.data['CA_H'][inds]
ca_hs[ca_hs==-9999] = np.median(ca_hs[ca_hs!=-9999])
ti_hs = rcdist_sample.data['TI_H'][inds]
ti_hs[ti_hs==-9999] = np.median(ti_hs[ti_hs!=-9999])
v_hs = rcdist_sample.data['V_H'][inds]
v_hs[v_hs==-9999] = np.median(v_hs[v_hs!=-9999])
mn_hs = rcdist_sample.data['MN_H'][inds]
mn_hs[mn_hs==-9999] = np.median(mn_hs[mn_hs!=-9999])
ni_hs = rcdist_sample.data['NI_H'][inds]
ni_hs[ni_hs==-9999] = np.median(ni_hs[ni_hs!=-9999])
atm= atlas9.Atlas9Atmosphere(teff=teffs[0],logg=loggs[0],metals=m_hs[0],am=a_fes[0],cm=0.25)
# The following takes a while ...
synspeca= apogee.modelspec.turbospec.synth([6,c_hs[0]],[7,n_hs[0]],[8,o_hs[0]],[11,na_hs[0]],[12,mg_hs[0]],[13,al_hs[0]],
[14,si_hs[0]],[16,s_hs[0]],[19,k_hs[0]],[20,ca_hs[0]],[22,ti_hs[0]],
[23,v_hs[0]],[25,mn_hs[0]],[28,ni_hs[0]],[26,fe_hs[0]],modelatm=atm,
linelist='201404080919',lsf='all',cont='cannon',
vmacro=6.,isotopes='solar')
from apogee.tools import toAspcapGrid,toApStarGrid
synspec = toAspcapGrid((synspeca[0]-1)/4.)
rootmatch(synspec,eigvec=eigvec,k=np.arange(1.516,1.70,0.015),num=8,rootfindtol=1,
rootmatchtol=1.5,doplot=False,cutoff=0.015,deriv2tol=-5e-3)
Out[49]:
In [72]:
k = np.array([(1.5165,1.536),(1.536,1.556),(1.556,1.5795),
(1.5877,1.605),(1.605,1.623),(1.623,1.642),
(1.6495,1.675),(1.675,1.694),(1.675,1.7)])
a = rootmatch(synspec,eigvec=eigvec,num=1,rootfindtol=1,k=k,
rootmatchtol=1.5,doplot=True,cutoff=0.015,figsize=(15,20),figs=1,
pairrange=True,deriv2tol=-5e-3)
colors = plt.get_cmap('viridis')(np.linspace(0.5,0.9,3))
#legend=plt.legend(loc='best')
plt.subplots_adjust(hspace=0.25)
plt.savefig('{0}/PC1_tscomp.pdf'.format(figdir))
print a
In [82]:
figdir
Out[82]:
In [ ]:
In [33]:
Nstars = 1
teffs = rcdist_sample.teff
teffs = np.median(teffs[teffs!=-9999])
loggs = rcdist_sample.logg
loggs = np.median(loggs[loggs!=-9999])
m_hs = rcdist_sample.data['METALS']
m_hs = np.median(m_hs[m_hs!=-9999])
a_fes = rcdist_sample.data['ALPHAFE']
a_fes= np.median(a_fes[a_fes!=-9999])
fe_hs = rcdist_sample.fe_h
fe_hs = np.median(fe_hs[fe_hs!=-9999])
c_hs = rcdist_sample.c_h
c_hs = np.median(c_hs[c_hs!=-9999])
n_hs = rcdist_sample.n_h
n_hs = np.median(n_hs[n_hs!=-9999])
o_hs = rcdist_sample.o_h
o_hs = np.median(o_hs[o_hs!=-9999])
na_hs = rcdist_sample.data['NA_H']
na_hs = np.median(na_hs[na_hs!=-9999])
mg_hs = rcdist_sample.data['MG_H']
mg_hs = np.median(mg_hs[mg_hs!=-9999])
al_hs = rcdist_sample.data['AL_H']
al_hs = np.median(al_hs[al_hs!=-9999])
si_hs = rcdist_sample.data['SI_H']
si_hs = np.median(si_hs[si_hs!=-9999])
s_hs = rcdist_sample.data['S_H']
s_hs = np.median(s_hs[s_hs!=-9999])
k_hs = rcdist_sample.data['K_H']
k_hs= np.median(k_hs[k_hs!=-9999])
ca_hs = rcdist_sample.data['CA_H']
ca_hs = np.median(ca_hs[ca_hs!=-9999])
ti_hs = rcdist_sample.data['TI_H']
ti_hs = np.median(ti_hs[ti_hs!=-9999])
v_hs = rcdist_sample.data['V_H']
v_hs = np.median(v_hs[v_hs!=-9999])
mn_hs = rcdist_sample.data['MN_H']
mn_hs = np.median(mn_hs[mn_hs!=-9999])
ni_hs = rcdist_sample.data['NI_H']
ni_hs = np.median(ni_hs[ni_hs!=-9999])
atm= atlas9.Atlas9Atmosphere(teff=teffs,logg=loggs,metals=m_hs,am=a_fes,cm=0.25)
# The following takes a while ...
synspeca= apogee.modelspec.turbospec.synth([6,c_hs],[7,n_hs],[8,o_hs],[11,na_hs],[12,mg_hs],[13,al_hs],
[14,si_hs],[16,s_hs],[19,k_hs],[20,ca_hs],[22,ti_hs],
[23,v_hs],[25,mn_hs],[28,ni_hs],[26,fe_hs],modelatm=atm,
linelist='201404080919',lsf='all',cont='cannon',
vmacro=6.,isotopes='solar')
from apogee.tools import toAspcapGrid,toApStarGrid
synspec = toAspcapGrid((synspeca[0]-1)/4.)
rootmatch(synspec,eigvec=eigvec3,k=np.arange(1.516,1.70,0.015),num=8,rootfindtol=1,
rootmatchtol=1.5,doplot=True,cutoff=[0.015,0.005,0.005,0.005,0.005,0.005,0.005,0.005])
In [50]:
teffs, loggs, fe_hs, c_hs, n_hs, o_hs, na_hs, mg_hs, al_hs, si_hs, s_hs, k_hs, ca_hs, ti_hs, v_hs, mn_hs, ni_hs
Out[50]:
In [44]:
ranges = ([1.525,1.529],[1.543,1.547],[1.620,1.624])
num = 3
plt.figure(figsize=(15,10))
ax = plt.subplot(111)
k = 0
order = [1,4,7,2,5,8,3,6,9]
yranges = np.arange(-0.1,0.06,0.05)[1:]
for i in range(len(ranges)):
print i
down,up = ranges[i]
wvrange = np.array([down*1e4,up*1e4])
pixmin,pixmax = wv2pix(wvrange,apStarWavegrid=True)
colors = plt.get_cmap('viridis')(np.linspace(0.5,0.9,3))
for e in range(num):
ax = plt.subplot(num,3,order[k])
if pixmin < 0:
pixmin = 0
if pixmax > 8575:
pixmax = 8575
wv = pix2wv(np.arange(pixmin,pixmax),apStarWavegrid=True)
xranges = np.arange((min(wv)/1e4)+0.005,(max(wv)/1e4)+0.005,0.005)
if e == 1:
ax.plot(wv/1e4,-toApStarGrid(eigvec[e])[pixmin:pixmax],color=colors[e],lw=3,label='PC {0}'.format(e+1))
elif e != 1:
ax.plot(wv/1e4,toApStarGrid(eigvec[e])[pixmin:pixmax],color=colors[e],lw=3,label='PC {0}'.format(e+1))
ax.plot(wv/1e4,toApStarGrid(synspec)[pixmin:pixmax],color='k',ls='--',lw=2,label='synthetic\nspectrum',alpha=0.6)
ax.set_xlim(min(wv)/1e4,max(wv)/1e4)
ax.set_ylim(-0.1,0.06)
if order[k] != 1 and order[k] != 4 and order[k] != 7:
ax.set_yticklabels(['']*len(yranges))
if order[k] == 1 or order[k] == 4 or order[k] == 7:
ax.set_yticks(yranges)
ax.set_yticklabels(yranges)
#ax.set_ylabel('PC magnitude')
if order[k] != 7 and order[k] != 8 and order[k] != 9:
ax.set_xticklabels(['']*len(xranges))
if order[k] == 8:
ax.set_xlabel(r'$\mu$m')
if order[k] == 3 or order[k] == 6 or order[k] == 9:
legend=plt.legend(loc='best')
legend = plt.legend(loc='best')
legend.get_frame().set_linewidth(0.0)
xminorlocator = MultipleLocator(0.0005)
yminorlocator = MultipleLocator(0.025)
ax.xaxis.set_minor_locator(xminorlocator)
ax.yaxis.set_minor_locator(yminorlocator)
ax.tick_params(which='major',direction='in',length=5,width=2,bottom=True,top=True,left=True,right=True)
ax.tick_params(which='minor',direction='in',length=4,width=1.5,bottom=True,top=True,left=True,right=True)
k+=1
ax.annotate('principal component magnitude',(0,0.8),xycoords='figure fraction',rotation=90)
plt.tight_layout()
plt.subplots_adjust(wspace=0.05,hspace=0.05)
plt.savefig('{0}/feature_location_comparison.pdf'.format(figdir))
We note that any matching between the synthetic spectrum shown here and the principal components is merely a coincidence of normalization choice. More important is the agreement on the position and shape of the features
In [ ]:
sub
In [ ]:
In [ ]: