notebook.community

Edit and run



In [1]:

    
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import sys
import seaborn as sns
from scipy.interpolate import interp1d
sys.path.append('../')
from matplotlib.ticker import FormatStrFormatter
import emcee
from delight.io import *
from delight.utils import *
from delight.utils_cy import approx_flux_likelihood_cy
from delight.photoz_gp import PhotozGP
from delight.photoz_kernels import Photoz_mean_function, Photoz_kernel
from matplotlib.ticker import FormatStrFormatter



In [2]:

    
# Setting style
from matplotlib import rcParams
rcParams["font.family"] = "sans-serif"
rcParams["font.sans-serif"] = ["Computer Modern Sans"]
sns.set_style('white')
sns.set_palette("colorblind", 12) #Paired
sns.palplot(sns.color_palette())
rcParams['xtick.major.size'] = 2
rcParams['xtick.major.width'] = 1
rcParams['ytick.major.size'] = 2
rcParams['ytick.major.width'] = 1
rcParams['xtick.direction'] = 'in'
rcParams['ytick.direction'] = 'in'
colTF = sns.color_palette()[2] 
colGP = sns.color_palette()[0]
colGP2 = sns.color_palette()[5]



In [3]:

    
# Read parameter files and data (file names need to be updated)
# Load SEDS, bands, grids, etc.
%cd '/Users/bl/Dropbox/repos/Delight/'
params = parseParamFile('parameter_files/params_paun_dsim.cfg', verbose=False)
#params['training_catFile'] = '/Users/bl/Dropbox/repos/Delight/data/galaxies-fluxredshifts_small.txt'
#params['target_catFile'] = '/Users/bl/Dropbox/repos/Delight/data/galaxies-fluxredshifts_small2.txt'
bandCoefAmplitudes, bandCoefPositions, bandCoefWidths, norms\
    = readBandCoefficients(params)
bandNames = params['bandNames']
numBands, numCoefs = bandCoefAmplitudes.shape
redshiftDistGrid, redshiftGrid, redshiftGridGP = createGrids(params)
f_mod = readSEDs(params)
numObjectsTraining = np.sum(1 for line in open(params['training_catFile']))
print('Number of Training Objects', numObjectsTraining)









    



/Users/bl/Dropbox/repos/Delight
Number of Training Objects 500






    



/Users/bl/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:13: DeprecationWarning: Calling np.sum(generator) is deprecated, and in the future will give a different result. Use np.sum(np.fromiter(generator)) or the python sum builtin instead.
  del sys.path[0]



In [81]:

    
# read filter coefficients (Gaussian mixture model approximation)
# Then compute mean and size of filters, for plotting later.
bandwavelengths = np.zeros((numBands, 2))
for i in range(numBands):
    amps, mus, sigs = bandCoefAmplitudes[i, :], bandCoefPositions[i, :], bandCoefWidths[i, :]
    lamMin, lamMax = np.min(mus - 2*sigs), np.max(mus + 2*sigs) 
    x = np.linspace(lamMin, lamMax, 5000)
    y = np.sum(np.exp(-0.5*((mus[None, :]-x[:, None])/sigs[None, :])**2), axis=1)
    bandwavelengths[i, 0] = np.average(x, weights=y)
    bandwavelengths[i, 1] = np.sqrt(np.average((x-bandwavelengths[i, 0])**2, weights=y))



In [82]:

    
plt.errorbar(bandwavelengths[:, 0], fluxes, yerr=fluxesVar**0.5, xerr=bandwavelengths[:, 1], fmt='+')









    Out[82]:





<ErrorbarContainer object of 3 artists>



In [83]:

    
# Load templates
dir_seds = params['templates_directory']
dir_filters = params['bands_directory']
lambdaRef = params['lambdaRef']
sed_names = params['templates_names']
fmt = '.dat'
sed_interps = np.zeros((len(sed_names), ), dtype=object)
for i, sed_name in enumerate(sed_names):
    seddata = np.genfromtxt(dir_seds + '/' + sed_name + fmt)
    seddata[:, 1] *= seddata[:, 0]**2. / 3e18
    ref = np.interp(lambdaRef, seddata[:, 0], seddata[:, 1])
    seddata[:, 1] /= ref
    sed_interps[i] = interp1d(seddata[:, 0], seddata[:, 1])



In [84]:

    
# Create GP 
# Could change hyperparameters here
DL = approx_DL()
gp = PhotozGP(f_mod, bandCoefAmplitudes, bandCoefPositions, bandCoefWidths,
              params['lines_pos'], params['lines_width'],
              params['V_C'], params['V_L'],
              params['alpha_C'], params['alpha_L'],
              redshiftGridGP, use_interpolators=True)



In [85]:

    
# We need a function to construct the GP residuals in wavelength space, with the two RBF kernels
# (In delight this is done in redshift/band space, but we want the actual SED here)

lamMin = 3.7e3#np.min(bandwavelengths[:, 0]-2*bandwavelengths[:, 1])
lamMax = 1.1e4#np.max(bandwavelengths[:, 0]+2*bandwavelengths[:, 1])
wavs = np.logspace(np.log10(lamMin), np.log10(lamMax), 80) #np.linspace(lamMin, lamMax, 10)
wavs_hi = np.logspace(np.log10(lamMin), np.log10(lamMax), 2000) #np.linspace(lamMin, lamMax, 1000)

def drawSEDresiduals(gp, z, ell, wavs):
    opz = 1 + z
    dWav = wavs[:, None] - wavs[None, :]
    cov_C = gp.kernel.var_C *\
        np.exp(-0.5*(dWav/opz/gp.kernel.alpha_C)**2)
    cov_L = 0*cov_C
    for mu, sig in zip(gp.kernel.lines_mu, gp.kernel.lines_sig):
        cov_L += gp.kernel.var_L *\
            np.exp(-0.5*(dWav/opz/gp.kernel.alpha_L)**2) *\
            np.exp(-0.5*((wavs[:, None]/opz-mu)/sig)**2) *\
            np.exp(-0.5*((wavs[None, :]/opz-mu)/sig)**2)
    fac = opz * ell / 4 / np.pi / gp.kernel.DL_z(z)**2
    residuals = fac * np.random.multivariate_normal(0*wavs, cov_C + cov_L)
    numB = gp.kernel.fcoefs_amp.shape[0]
    filters = np.zeros((numB, wavs.size))
    for i in range(numB):
        for amp, mu, sig in zip(gp.kernel.fcoefs_amp[i, :],
                                gp.kernel.fcoefs_mu[i, :],
                                gp.kernel.fcoefs_sig[i, :]):
            filters[i, :] += amp * np.exp(-0.5*((wavs-mu)/sig)**2)
    return residuals, fac, cov_C + cov_L, filters



In [86]:

    
numZ = redshiftGrid.size
all_z = np.zeros((numObjectsTraining, ))
all_fluxes = np.zeros((numObjectsTraining, numBands))
all_fluxes_var = np.zeros((numObjectsTraining, numBands))
all_bestTypes = np.zeros((numObjectsTraining, ), dtype=int)
all_model_mean = np.zeros((numZ, numObjectsTraining, numBands))
all_model_covar = np.zeros((numZ, numObjectsTraining, numBands))

loc = - 1
trainingDataIter1 = getDataFromFile(params, 0, numObjectsTraining,
                                    prefix="training_", getXY=True,
                                    CV=True)
for z, normedRefFlux,\
    bands, fluxes, fluxesVar,\
    bandsCV, fluxesCV, fluxesVarCV,\
        X, Y, Yvar in trainingDataIter1:
    loc += 1

    themod = np.zeros((1, f_mod.shape[0], bands.size))
    for it in range(f_mod.shape[0]):
        for ib, band in enumerate(bands):
            themod[0, it, ib] = f_mod[it, band](z)
    chi2_grid, ellMLs = scalefree_flux_likelihood(fluxes, fluxesVar, themod, returnChi2=True)
    bestType = np.argmin(chi2_grid)
    ell = ellMLs[0, bestType]
    #ell2 = normedRefFlux * 4 * np.pi * (DL(z)**2. * (1+z)) / params['fluxLuminosityNorm']
    X[:, 2] = ell
    gp.setData(X, Y, Yvar, bestType)
    model_mean, model_covar = gp.predictAndInterpolate(redshiftGrid, ell=ell)
    all_model_mean[:, loc, :], all_model_covar[:, loc, :] = model_mean, model_covar
    all_z[loc] = z
    all_bestTypes[loc] = bestType
    all_fluxes[loc, bands] = fluxes
    all_fluxes_var[loc, bands] = fluxesVar
    
    if loc < 10:
        if True:
            fac = ell * (1+z)**2. / DL(z)**2. / (4*np.pi) * 1e4/2.3
            sedtf = sed_interps[bestType](wavs/(1+z)) * fac
            sedtf_hi = sed_interps[bestType](wavs_hi/(1+z)) * fac

            if True:
                residuals, fac, cov, filters = drawSEDresiduals(gp, z, ell, wavs)
                residuals_hi, fac_hi, cov_hi, filters_hi = drawSEDresiduals(gp, z, ell, wavs_hi)
                filternorms = [np.trapz(filters_hi[ib], x=wavs_hi) for i, ib in enumerate(bands)]
                sed = np.random.uniform(wavs.size) * wavs
                #icov = np.linalg.inv(cov)
                det = np.linalg.det(cov)
                def lnprob(sed, sedtf, fac, fluxes, bands, wavs, wavs_hi, cov, det, filters_hi, filternorms):
                    sedfluxes = np.zeros((bands.size, ))
                    sed_hi = interp1d(wavs, sed, kind='cubic')(wavs_hi)#np.interp(wavs_hi, wavs, sed)
                    for i, ib in enumerate(bands):
                        sedfluxes[i] = np.trapz(filters_hi[ib]*sed_hi, x=wavs_hi) / filternorms[i]
                    lp = np.sum(-0.5*(sedfluxes - fluxes)**2/fluxesVar) #- 0.5 * np.prod(np.log(fluxesVar))
                    res = (sed - sedtf) / fac
                    #print(lp,  np.sum(-0.5*np.dot(res, np.linalg.solve(cov, res))), end=" ")
                    #lp += np.sum(-0.5*np.dot(res, np.linalg.solve(cov, res))) 
                    return lp
                ndim = wavs.size
                nwalkers = ndim * 10
                ymax = fluxes.max() * 1.2
                ymin = fluxes.min() / 2
                p0 = np.random.uniform(low=ymin, high=ymax, size=ndim * nwalkers).reshape((nwalkers, ndim))
                for i in range(nwalkers):
                    p0[i] = 1*sedtf + fac * np.random.multivariate_normal(0*wavs, cov)
                    #print(lnprob(p0[i], sedtf, fac, fluxes, bands, wavs, wavs_hi, cov, det, filters_hi, filternorms))
                #stop
                sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob,
                                                args=[sedtf, fac, fluxes, bands, wavs, wavs_hi, cov, det, filters_hi, filternorms])
                pos, prob, state = sampler.run_mcmc(p0, 20)
                sampler.reset()
                sampler.run_mcmc(pos, 20)
                print("Mean acceptance fraction: {0:.3f}"
                        .format(np.mean(sampler.acceptance_fraction)))
                seds = sampler.flatchain
                sedmean, sedstd = seds.mean(axis=0), seds.std(axis=0)
                sedmean_hi = interp1d(wavs, sedmean, kind='cubic')(wavs_hi)
                sedstd_hi = interp1d(wavs, sedstd, kind='cubic')(wavs_hi)

            fig, ax = plt.subplots(1, 1, figsize=(6, 3))
            #ax.plot(wavs, sedtf, 'k')
            ax.plot(wavs_hi, sedtf_hi, c=colTF, label='Standard template fitting') 
            ax.plot(wavs_hi, sedmean_hi, color=colGP, label='New method')
            ax.fill_between(wavs_hi, sedmean_hi+sedstd_hi, sedmean_hi-sedstd_hi, color=colGP, alpha=0.4)
            
            
            #ax.fill_between(wavs, sedmean+sedstd, sedmean-sedstd, color='b', alpha=0.2)
            #for w in wavs:
            #    ax.axvline(w, c='gray', alpha=0.5, lw=0.5)
            #for i in range(seds.shape[0]):
            #    ax.plot(wavs_hi, interp1d(wavs, seds[i, :], kind='cubic')(wavs_hi), c='gray', alpha=0.2, lw=0.3)
            ax.errorbar(bandwavelengths[bands, 0], fluxes, 
                        xerr=bandwavelengths[bands, 1], yerr=np.sqrt(fluxesVar),
                       fmt='o', c='k', markersize=5, label='Deep training photometry')
            ax.errorbar(bandwavelengths[bandsCV, 0], fluxesCV, 
                        xerr=bandwavelengths[bandsCV, 1], yerr=np.sqrt(fluxesVarCV),
                       fmt='s', c='b', markersize=5, label='Shallow testing photometry')
            ax.set_yscale('log')
            ax.set_xscale('log')
            ax.set_ylabel(r'$L_\nu(\lambda)$')
            #ax.set_yticks(np.linspace(10**-5, 10**-4, 10))
            ax.set_xlabel(r'$\lambda$ [$\AA$]')
            ax.set_xlim([lamMin, lamMax])
            ax.set_xticks(np.linspace(4e3, 1e4, 7))
            ax.set_xticklabels(np.linspace(4e3, 1e4, 7))
            ax.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))
            #ax.set_ylim([0.5*np.min(sedtf), 2.5*np.max(sedtf)])
            ax.legend(ncol=2, frameon=False, loc='upper left')
            fig.tight_layout()
            fig.savefig('./paper/traininggalaxy_fnulambda-'+str(loc)+'.pdf')
            
        if False:
            fig, axs = plt.subplots(4, numBands//4, figsize=(12, 6), sharex=True, sharey=True)
            for i in range(numBands//4):
                axs[-1, i].set_xlabel('Redshift')
            for i in range(4):
                axs[i, 0].set_ylabel('Flux')
            axs = axs.ravel()
            temp = np.concatenate([ell*f_mod[bestType, i](redshiftGrid)*redshiftGrid**2
                                   for i, bnm in enumerate(bandNames)])
            axs[-1].set_ylim([0.5*np.min(temp), 2*np.max(temp)])
            for i, bnm in enumerate(bandNames):
                axs[i].text(0.1, 0.8*np.max(temp), bnm.replace('_', ' '))
                axs[i].plot(redshiftGrid, ell*f_mod[bestType, i](redshiftGrid)*redshiftGrid**2,
                            c=colTF, label='Standard template fitting', ls='dashed', zorder=2)
                axs[i].plot(redshiftGrid, model_mean[:, i]*redshiftGrid**2,
                            c=colGP, label='New method', zorder=1)#, label=bnm.replace('_', ' '))
                axs[i].fill_between(redshiftGrid, 
                                    (model_mean[:, i]-np.sqrt(model_covar[:, i]))*redshiftGrid**2, 
                                    (model_mean[:, i]+np.sqrt(model_covar[:, i]))*redshiftGrid**2,
                                    color=colGP, alpha=0.4, zorder=0)
                if i in bands:
                    pos = list(bands).index(i)
                    axs[i].errorbar(z, fluxes[pos]*z**2, yerr=np.sqrt(fluxesVar[pos])*z**2, c='k', markersize=5, fmt='o')
                if i in bandsCV:
                    pos = list(bandsCV).index(i)
                    axs[i].errorbar(z, fluxesCV[pos]*z**2, yerr=np.sqrt(fluxesVarCV[pos])*z**2, c='b', markersize=5, fmt='s')
                #axs[i].set_ylim([np.min(model_mean), np.max(model_mean)])
                axs[i].set_xticks([0, 0.5, 1.0])
            axs[0].legend(ncol=3, frameon=False, loc='upper left', bbox_to_anchor=(0.0, 1.4))
            axs[-1].set_yscale('log')
            axs[-1].set_xlim([0, 1.5])
            fig.subplots_adjust(wspace=0, hspace=0)
            fig.savefig('./paper/traininggalaxy_fluxredshiftmodel-'+str(loc)+'.pdf')
            
    else:
        break









    



Mean acceptance fraction: 0.259
Mean acceptance fraction: 0.300
Mean acceptance fraction: 0.290
Mean acceptance fraction: 0.228
Mean acceptance fraction: 0.334
Mean acceptance fraction: 0.315
Mean acceptance fraction: 0.267
Mean acceptance fraction: 0.276
Mean acceptance fraction: 0.225
Mean acceptance fraction: 0.339



In [9]:

    
# Create the redshift / photometric model
dir_seds = params['templates_directory']
dir_filters = params['bands_directory']
lambdaRef = params['lambdaRef']
sed_names = params['templates_names']
f_mod_grid = np.zeros((redshiftGrid.size, len(sed_names),
                  len(params['bandNames'])))
for t, sed_name in enumerate(sed_names):
    f_mod_grid[:, t, :] = np.loadtxt(dir_seds + '/' + sed_name +
                                '_fluxredshiftmod.txt')



In [10]:

    
# Only run that to look at redshift PDFs
p_t = params['p_t'][all_bestTypes][None, :]
p_z_t = params['p_z_t'][all_bestTypes][None, :]
prior = np.exp(-0.5*((redshiftGrid[:, None]-all_z[None, :])/params['zPriorSigma'])**2)
Ncompress = params['Ncompress']

fig, axs = plt.subplots(1, 2, figsize=(8, 2.5), sharey=True)

loc = -1
targetDataIter = getDataFromFile(params, 0, numObjectsTraining,
                                 prefix="target_", getXY=False)
for z, normedRefFlux, bands, fluxes, fluxesVar, bCV, fCV, fvCV in targetDataIter:
    loc += 1
    if loc < 100:
        fulllike_grid = np.zeros((all_model_mean.shape[0], all_model_mean.shape[1]))
        ell_hat_z = normedRefFlux * 4 * np.pi\
            * params['fluxLuminosityNorm'] \
            * (DL(redshiftGrid)**2. * (1+redshiftGrid))
        ell_hat_z[:] = 1.0
        params['ellPriorSigma'] = 0.2
        approx_flux_likelihood_cy(fulllike_grid,
            all_model_mean.shape[0], all_model_mean.shape[1], bands.size,
            fluxes, fluxesVar, all_model_mean[:, :, bands],
            all_model_covar[:, :, bands], 
            ell_hat_z, (ell_hat_z*params['ellPriorSigma'])**2
        )
        fulllike_grid *= prior
        evidences = np.trapz(fulllike_grid, x=redshiftGrid, axis=0)
        sortind = np.argsort(evidences)[::-1][0:Ncompress]
        like_grid = fulllike_grid.sum(axis=1)
        like_grid_comp = fulllike_grid[:, sortind].sum(axis=1)
        alllike_grid_cww = np.zeros((f_mod_grid.shape[0], f_mod_grid.shape[1]))
        params['ellPriorSigma'] = 1e12
        approx_flux_likelihood_cy(alllike_grid_cww,
            f_mod_grid.shape[0], f_mod_grid.shape[1], bands.size,
            fluxes, fluxesVar, f_mod_grid[:, :, bands], 0*f_mod_grid[:, :, bands], 
            ell_hat_z, (ell_hat_z*params['ellPriorSigma'])**2
        )
        b_in = np.array(params['p_t'])[None, :]
        beta2 = np.array(params['p_z_t'])**2.0
        p_z = b_in * redshiftGrid[:, None] * np.exp(-0.5 * redshiftGrid[:, None]**2 / beta2[None, :]) / beta2[None, :]
        alllike_grid_cww *= p_z
        besttype = np.argmax(alllike_grid_cww.sum(axis=0))
        like_grid_cww = alllike_grid_cww.sum(axis=1)  # [:, besttype]
        if like_grid.sum() > 0:
            zphotmean = np.average(redshiftGrid, weights=like_grid)
            if zphotmean > 0.0 and zphotmean < 2.5 and z < 2.8:

                for ax in axs:
                    ax.cla()
                for ii in sortind:
                    axs[1].plot(redshiftGrid, fulllike_grid[:, ii], c='gray', lw=1, alpha=0.6)
                axs[0].plot(redshiftGrid, like_grid, c=colGP, lw=2, label='New method')
                like_grid_cww = like_grid_cww * np.max(like_grid) / np.max(like_grid_cww)
                axs[0].plot(redshiftGrid, like_grid_cww, c=colTF, lw=2, label='Standard template fitting')
                axs[1].plot(redshiftGrid, like_grid_comp, c=colGP, lw=2, label='Compressed new method')
                axs[0].axvline(z, c='k', lw=1, ls='dashed', label='True redshift')
                axs[1].axvline(z, c='k', lw=1, ls='dashed')
                axs[0].set_ylabel(r'$p(z | \hat{\mathbf{F}})$')
                axs[1].set_xlabel('Redshift')
                thegr = np.concatenate((redshiftGrid[np.logical_and(like_grid_cww > 0.01, like_grid > 0.01)],
                                 all_z[sortind]))    
                axs[1].set_xlim([np.min(thegr), np.max(thegr)])
                axs[0].set_xlim([np.min(thegr), np.max(thegr)])
                axs[0].set_xlabel('Redshift')
                ylimax = 1.2*np.max(np.concatenate((like_grid, like_grid_cww)))
                axs[0].set_ylim([0, ylimax])
                for iioff, ii in enumerate(sortind):
                    if iioff == 0:
                        lb = 'Training galaxies'
                    else:
                        lb = None
                    axs[1].scatter(all_z[ii], ylimax*0.99, c='gray', marker='x', lw=2, s=10, label=lb)
                axs[0].yaxis.set_major_formatter(FormatStrFormatter('%.2e'))
                axs[0].legend(loc='upper right', frameon=False, ncol=2, bbox_to_anchor=(1.0, 1.35))
                axs[0].set_yticks([])
                axs[1].legend(loc='upper right', frameon=False, ncol=1, bbox_to_anchor=(0.8, 1.35))
                axs[1].set_yticks([])
                #fig.tight_layout()
                fig.subplots_adjust(wspace=0, hspace=0, bottom=0.2, top=0.8, left=0.05, right=0.95)
                fig.savefig('./paper/data-pdfs-'+str(loc)+'.pdf')



In [ ]: