ESP Paper

Here is all the code that went into the paper and everything required to create the plots as well.


In [ ]:
import esp
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import shutil
import pandas as pd
from esp.lsst_utils import Bandpass
from esp.lsst_utils import BandpassDict
from esp.lsst_utils import Sed
from sklearn.neighbors import NearestNeighbors
from scipy.stats import binned_statistic, trim_mean
from sklearn.decomposition import PCA as sklPCA
from esp.spec_utils import specUtils
%load_ext autoreload
%autoreload 2
%matplotlib inline

Load sims_sed_library and remove duplicate SEDs


In [ ]:
home_dir = os.getenv('HOME')
galaxy_dir = '%s/lsst/DarwinX86/sims_sed_library/2016.01.26/galaxySED/' % home_dir
pca_obj = esp.pcaSED()
pca_obj.load_full_spectra(galaxy_dir)

In [ ]:
new_spec_list = []
i = 0
for spec_obj in pca_obj.spec_list_orig:
    if i % 100 == 0:
        print('On Spectrum %i out of %i' % (i, len(pca_obj.spec_list_orig)))
    j = 0
    keep = True
    for new_spec_obj in new_spec_list:
        spec_flux = new_spec_obj.flambda
        if np.array_equal(np.array(spec_obj.flambda), spec_flux):
            #print(i,j) ## Uncomment to see which SEDs match to one another
            keep = False
        elif len(np.where(np.isclose(np.array(spec_obj.flambda), 
                                     spec_flux, atol=0., rtol=1e-5) == True)[0]) >= (0.9*6900):
            #print(i,j) ## Uncomment to see which SEDs match to one another
            keep = False
        else:
            j+=1
    
    if keep is True:
        new_spec_list.append(spec_obj)
    i += 1
print(len(new_spec_list))

What is the resolution of the spectra and what do they look like?


In [ ]:
resolution = new_spec_list[0].wavelen[1:] - new_spec_list[0].wavelen[:-1]

In [ ]:
print(new_spec_list[190].name)
print(new_spec_list[240].name)
print(new_spec_list[360].name)
print(new_spec_list[686].name)
print(np.where(new_spec_list[0].wavelen <= 2400.))

In [ ]:
mpl.rc('xtick', labelsize=22) 
mpl.rc('ytick', labelsize=22)

In [ ]:
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(14,12))

fig.text(0.55, -0.02, 'Wavelength (nm)', ha='center', size=32)

ax[0].plot(new_spec_list[0].wavelen[:6561], 
           (new_spec_list[190].flambda/np.max(new_spec_list[190].flambda))[:6561], lw=4,
          label='Burst SF, 5.0E9 Years, Z=Z_sun')
ax[0].plot(new_spec_list[0].wavelen[:6561], 
           (new_spec_list[240].flambda/np.max(new_spec_list[240].flambda))[:6561], lw=4,
          label='Constant SF, 1.0E9 Years, Z=0.005*Z_sun')
ax[0].plot(new_spec_list[0].wavelen[:6561], 
           (new_spec_list[360].flambda/np.max(new_spec_list[360].flambda))[:6561], lw=4,
          label='Exponential SF, 1.5E6 Years, Z=0.02*Z_sun')
ax[0].plot(new_spec_list[0].wavelen[:6561], 
           (new_spec_list[686].flambda/np.max(new_spec_list[686].flambda))[:6561], lw=4,
          label='Instantaneous SF, 3.2E8 Years, Z=2.5*Z_sun')
ax[0].set_ylabel('Scaled Flux', size=24)
ax[0].set_title('Example Spectra', size=24)
ax[0].legend(fontsize=18)



ax[1].plot(new_spec_list[0].wavelen[:6561], resolution[:6561], label='Resolution', lw=6)
ax[1].set_title('Resolution', size=24)
ax[1].set_ylabel('Resolution (nm)', size=24)

plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_0.png')

Load Bandpasses and Create Artificial Bandpasses beyond the ugrizy of LSST


In [ ]:
bandpass_dir = '%s/lsst/DarwinX86/throughputs/2016.12.13/baseline/' % home_dir
filters = ['u', 'g', 'r', 'i', 'z', 'y']
bandpass_dict = BandpassDict.loadTotalBandpassesFromFiles(bandpassNames = filters,
                                                          bandpassDir = bandpass_dir,
                                                          bandpassRoot = 'total_')

In [ ]:
blue_sb_1 = np.zeros(13000)
blue_sb_1[0:500] += 1.
#blue_sb_1[:100] += 1.
blue_sb_2 = np.zeros(13000)
blue_sb_2[500:1000] += 1.
#blue_sb_2[100:200] += 1.

blue_sb_3 = np.zeros(13000)
blue_sb_3[1000:1500] += 1.
blue_sb_4 = np.zeros(13000)
blue_sb_4[1500:2000] += 1.

red_sb_1 = np.zeros(13000)
red_sb_1[11000:11500] += 1.
#red_sb_1[1100:1200] += 1.
red_sb_2 = np.zeros(13000)
red_sb_2[11500:12000] += 1.
#red_sb_2[1200:] += 1.

red_sb_3 = np.zeros(13000)
red_sb_3[12000:12500] += 1.
red_sb_4 = np.zeros(13000)
red_sb_4[12500:13000] += 1.

blue_bandpass_1 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=blue_sb_1)
blue_bandpass_2 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=blue_sb_2)
blue_bandpass_3 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=blue_sb_3)
blue_bandpass_4 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=blue_sb_4)
red_bandpass_1 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=red_sb_1)
red_bandpass_2 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=red_sb_2)
red_bandpass_3 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=red_sb_3)
red_bandpass_4 = Bandpass(wavelen=np.arange(100, 1400, .1), sb=red_sb_4)

In [ ]:
new_bandpass_dict = BandpassDict([
                                  blue_bandpass_1, blue_bandpass_3, 
                                  #blue_bandpass_3, blue_bandpass_4,
                                  bandpass_dict['u'], bandpass_dict['g'], 
                                  bandpass_dict['r'], bandpass_dict['i'], bandpass_dict['z'], 
                                  bandpass_dict['y'], #red_bandpass_1, red_bandpass_2, 
                                  red_bandpass_2, red_bandpass_4
                                 ],
                                ['blue_1', #'blue_2', 
                                 'blue_3', #'blue_4', 
                                 'u', 'g', 'r', 'i', 'z', 'y', 
                                 'red_2',#'red_2', 
                                 'red_4', #'red_4'
                                ])

In [ ]:
pca_obj.spec_list_orig = new_spec_list

Define functions for later


In [ ]:
def choose_training_and_test_set(pca_obj, bandpass_dict, min_wavelen, max_wavelen, 
                                 rand_print=False):
    rand_choice = np.random.choice(np.arange(len(pca_obj.spec_list_orig)), size=60, replace=False)
    if rand_print is True:
        print(rand_choice)
    sed_list = []
    names = []
    for row in rand_choice:
        #print row
        sed_list.append(pca_obj.spec_list_orig[row])
        names.append(sed_list[-1].name)
    training_list = sed_list[:10]
    training_names = names[:10]
    test_list = sed_list[10:]
    test_names = names[10:]
    
    training_colors = []
    test_colors = []
    
    for train_sed in training_list:
        
        train_mags = bandpass_dict.magListForSed(train_sed)
        
        colors = [train_mags[x] - train_mags[x+1] for x in range(len(train_mags)-1)]
        training_colors.append(colors)        
        
    
    min_idx = np.where(test_list[0].wavelen < min_wavelen)[0][-1] + 1
    max_idx = np.where(test_list[0].wavelen > max_wavelen)[0][0]
    test_fluxes = []
    
    for test_sed in test_list:
        
        test_fluxes.append(pca_obj.scale_spectrum(test_sed.flambda[min_idx:max_idx]))
        
        test_mags = bandpass_dict.magListForSed(test_sed)
        
        colors = [test_mags[x] - test_mags[x+1] for x in range(len(test_mags)-1)]
        test_colors.append(colors)
    test_colors = np.array(test_colors)
    test_fluxes = np.array(test_fluxes)
        
    return training_colors, training_list, training_names, test_list, test_names, test_fluxes, test_colors

In [ ]:
from sklearn.linear_model import LinearRegression

def linear_interpolation_spectra(training_colors, test_colors, training_spectra, 
                                 bandpass_dict, min_wavelen, max_wavelen):
    lin_reg = LinearRegression()
    su = specUtils()
    
    training_fluxes = []
    for train_spec in training_spectra:
        training_fluxes.append(train_spec.flambda)
        
    min_idx = np.where(training_spectra[0].wavelen < min_wavelen)[0][-1] + 1
    max_idx = np.where(training_spectra[0].wavelen > max_wavelen)[0][0]
        
    lin_reg.fit(training_colors, training_fluxes)
    test_list = lin_reg.predict(test_colors)
    test_fluxes = []
    test_colors = []
    for test_flux in test_list:
        
        test_sed = Sed()
        for test_flux_bin in range(len(test_flux)):
            if test_flux[test_flux_bin] < 0.:
                test_flux[test_flux_bin] = 0.
                
        test_sed.setSED(wavelen=training_spectra[0].wavelen[min_idx:max_idx], flambda=test_flux[min_idx:max_idx])
        
        test_fluxes.append(su.scale_spectrum(test_sed.flambda))
        
        test_mags = bandpass_dict.magListForSed(test_sed)
        
        colors = [test_mags[x] - test_mags[x+1] for x in range(len(test_mags)-1)]
        test_colors.append(colors)
    
    return test_fluxes, test_colors

In [ ]:
def calc_dist_bins(distances, results, n_bins):
    #bin_vals = [(float(i)/n_bins)*np.max(distances) for i in range(n_bins)]
    bin_vals = [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 
                0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4, 1.6]
    bin_vals.append(float(np.max(distances)))
    idx_sort = distances.argsort()
    dist_sort = distances[idx_sort]
    #z_true_sort = z_true[idx_sort]
    results_sort = results[idx_sort]
    idx_bins = dist_sort.searchsorted(bin_vals)
    #print idx_bins
    results_binned = [results_sort[idx_bins[i]:idx_bins[i+1]] for i in range(len(bin_vals)-1)]
    return bin_vals, results_binned

In [ ]:
def mean_dist_results(results_binned):
    mean_results = [np.mean(x) for x in results_binned]
    return np.array(mean_results)

In [ ]:
def median_dist_results(results_binned):
    median_results = [np.median(x) for x in results_binned]
    return np.array(median_results)

Test 1: Optical Wavelengths


In [ ]:
np.random.seed(2314)

li_flux_results = np.zeros((25000, 6071))
nn_flux_u_results = np.zeros((25000, 6071))
#nn_flux_2u_results = np.zeros((25000, 6071))
nn_flux_2d_results = np.zeros((25000, 6071))
#nn_flux_4u_results = np.zeros((25000, 6071))
#nn_flux_4d_results = np.zeros((25000, 6071))
gp_exp_flux_results = np.zeros((25000, 6071))
gp_sq_exp_flux_results = np.zeros((25000, 6071))
gp_matern_32_flux_results = np.zeros((25000, 6071))
gp_matern_52_flux_results = np.zeros((25000, 6071))

training_colors_full = []
training_coeffs_full = []
training_eigenspectra = []
training_meanspec = []

test_colors_full = []

test_exp_params = []
test_sq_exp_params = []
test_matern_32_params = []
test_matern_52_params = []

distances_all = []

test_flux_orig = []
flux_errors_full = []

total_flagged = 0
n_runs = 500
min_wavelen = 299.
max_wavelen = 1200.
n_colors = 5
n_comps = 9

for i in range(n_runs):
    if i % 20 == 0:
        print('Run %i' % i)
    
    if os.path.exists('results'):
        shutil.rmtree('results')
    
    training_colors, training_list, training_names, test_list, test_names, \
    test_fluxes, test_colors = choose_training_and_test_set(pca_obj, 
                                                            bandpass_dict, min_wavelen, max_wavelen)
    
    new_pca_obj = esp.pcaSED()
    new_pca_obj.spec_list_orig = training_list
    new_pca_obj.PCA(comps=n_comps, minWavelen=min_wavelen, maxWavelen=max_wavelen)
    
    colors = training_colors

    nbrs = NearestNeighbors(n_neighbors=1).fit(training_colors)
    distance, idx = nbrs.kneighbors(test_colors)
    distances_all.append(np.ravel(distance))
    
    #print 'Linear Interp'
    li_spec, li_colors = linear_interpolation_spectra(colors, test_colors, new_pca_obj.spec_list_orig, 
                                                      bandpass_dict, min_wavelen, max_wavelen)
    li_flux_results[i*50:(i+1)*50] = np.abs(np.array((li_spec - test_fluxes)/test_fluxes))
    
    #print 'Nearest Neighbor Results'
    nn_obj = esp.nearestNeighborEstimate(new_pca_obj, bandpass_dict, test_colors)
    nn_spec = nn_obj.nn_predict(1)
    nn_flux_u_results[i*50:(i+1)*50] = np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes))

    #nn_spec = nn_obj.nn_predict(2)
    #nn_flux_2u_results.append(np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes)))
    nn_spec = nn_obj.nn_predict(2, knr_args=dict(weights='distance'))
    nn_flux_2d_results[i*50:(i+1)*50] = np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes))

    #nn_spec = nn_obj.nn_predict(4)
    #nn_flux_4u_results.append(np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes)))
    #nn_spec = nn_obj.nn_predict(4, knr_args=dict(weights='distance'))
    
    #print 'Gaussian Process Results'
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('exp', 1.0e-1, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    gp_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((gp_spec.reconstruct_spectra(n_comps) - test_fluxes)/test_fluxes))#, 
    test_exp_params.append(gp_spec.params)
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('sq_exp', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_sq_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))#, 
    test_sq_exp_params.append(gp_spec.params)
        
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_32', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_32_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_32_params.append(gp_spec.params)
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_52', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_52_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_52_params.append(gp_spec.params)
    
    training_colors_full.append(colors)
    test_colors_full.append(test_colors)


test_colors_full = np.array(test_colors_full)

training_colors_full = np.array(training_colors_full)

In [ ]:
print("Calculating overall means")

gp_exp_flux_mean = np.mean(gp_exp_flux_results)
nn_flux_u_mean = np.mean(nn_flux_u_results)
gp_sq_exp_flux_mean = np.mean(gp_sq_exp_flux_results)
nn_flux_2d_mean = np.mean(nn_flux_2d_results)
#nn_flux_2u_mean = np.mean(nn_flux_2u_results)
#nn_flux_4d_mean = np.mean(nn_flux_4d_results)
#nn_flux_4u_mean = np.mean(nn_flux_4u_results)
li_flux_mean = np.mean(li_flux_results)
gp_matern_32_flux_mean = np.mean(gp_matern_32_flux_results)
gp_matern_52_flux_mean = np.mean(gp_matern_52_flux_results)

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_spec = np.mean(gp_exp_flux_results, axis=0)
gp_sq_exp_flux_mean_spec = np.mean(gp_sq_exp_flux_results, axis=0)
nn_flux_u_mean_spec = np.mean(nn_flux_u_results, axis=0)
nn_flux_2d_mean_spec = np.mean(nn_flux_2d_results, axis=0)
#nn_flux_2u_mean_spec = np.mean(nn_flux_2u_results, axis=0)
#nn_flux_4d_mean_spec = np.mean(nn_flux_4d_results, axis=0)
#nn_flux_4u_mean_spec = np.mean(nn_flux_4u_results, axis=0)
li_flux_mean_spec = np.mean(li_flux_results, axis=0)
gp_matern_32_flux_mean_spec = np.mean(gp_matern_32_flux_results, axis=0)
gp_matern_52_flux_mean_spec = np.mean(gp_matern_52_flux_results, axis=0)

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_dist = np.mean(gp_exp_flux_results, axis=1)
gp_sq_exp_flux_mean_dist = np.mean(gp_sq_exp_flux_results, axis=1)
gp_matern_32_flux_mean_dist = np.mean(gp_matern_32_flux_results, axis=1)
gp_matern_52_flux_mean_dist = np.mean(gp_matern_52_flux_results, axis=1)
nn_flux_u_mean_dist = np.mean(nn_flux_u_results, axis=1)
nn_flux_2d_mean_dist = np.mean(nn_flux_2d_results, axis=1)
li_flux_mean_dist = np.mean(li_flux_results, axis=1)

In [ ]:
#Exponential Kernel 1st 3
print('Exponential')
print(np.mean(np.array(np.exp(test_exp_params))[:, 0], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_exp_params))[:, 1], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_exp_params))[:, 2], axis=0)*np.array([5., 1.]))
#Squared Exponential Kernel 1st 3
print('Squared Exponential')
print(np.mean(np.array(np.exp(test_sq_exp_params))[:, 0], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_sq_exp_params))[:, 1], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_sq_exp_params))[:, 2], axis=0)*np.array([5., 1.]))
#Matern 3/2 Kernel 1st 3
print('Matern 3/2')
print(np.mean(np.array(np.exp(test_matern_32_params))[:, 0], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_matern_32_params))[:, 1], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_matern_32_params))[:, 2], axis=0)*np.array([5., 1.]))
#Matern 5/2 Kernel 1st 3
print('Matern 5/2')
print(np.mean(np.array(np.exp(test_matern_52_params))[:, 0], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_matern_52_params))[:, 1], axis=0)*np.array([5., 1.]))
print(np.mean(np.array(np.exp(test_matern_52_params))[:, 2], axis=0)*np.array([5., 1.]))

In [ ]:
print(gp_exp_flux_mean)
print(gp_sq_exp_flux_mean)
print(gp_matern_32_flux_mean)
print(gp_matern_52_flux_mean)
print(nn_flux_u_mean)
print(nn_flux_2d_mean)
print(li_flux_mean)

In [ ]:
gp_exp_argsort = np.argsort(gp_exp_flux_mean_dist)
gp_exp_trim_25_idx = gp_exp_argsort[6250:-6250]
print(np.mean(gp_exp_flux_mean_dist[gp_exp_trim_25_idx]))

gp_sq_exp_argsort = np.argsort(gp_sq_exp_flux_mean_dist)
gp_sq_exp_trim_25_idx = gp_sq_exp_argsort[6250:-6250]
print(np.mean(gp_sq_exp_flux_mean_dist[gp_sq_exp_trim_25_idx]))

gp_matern_32_argsort = np.argsort(gp_matern_32_flux_mean_dist)
gp_matern_32_trim_25_idx = gp_matern_32_argsort[6250:-6250]
print(np.mean(gp_matern_32_flux_mean_dist[gp_exp_trim_25_idx]))

gp_matern_52_argsort = np.argsort(gp_matern_52_flux_mean_dist)
gp_matern_52_trim_25_idx = gp_matern_52_argsort[6250:-6250]
print(np.mean(gp_matern_52_flux_mean_dist[gp_matern_52_trim_25_idx]))

nn_flux_u_argsort = np.argsort(nn_flux_u_mean_dist)
nn_flux_u_trim_25_idx = nn_flux_u_argsort[6250:-6250]
print(np.mean(nn_flux_u_mean_dist[nn_flux_u_trim_25_idx]))

nn_flux_2d_argsort = np.argsort(nn_flux_2d_mean_dist)
nn_flux_2d_trim_25_idx = nn_flux_2d_argsort[6250:-6250]
print(np.mean(nn_flux_2d_mean_dist[nn_flux_2d_trim_25_idx]))

li_flux_argsort = np.argsort(li_flux_mean_dist)
li_flux_trim_25_idx = li_flux_argsort[6250:-6250]
print(np.mean(li_flux_mean_dist[li_flux_trim_25_idx]))

In [ ]:
fig = plt.figure(figsize=(14, 6))

ax = plt.gca()
ax.set_xlabel('Wavelength (nm)', size=32)
mpl.rc('xtick', labelsize=22) 
mpl.rc('ytick', labelsize=22) 

fig.add_subplot(1,1,1)
plt.plot(new_pca_obj.wavelengths, nn_flux_u_mean_spec, label='Nearest')
plt.plot(new_pca_obj.wavelengths, nn_flux_2d_mean_spec, label='2 Nearest, Distance')
plt.plot(new_pca_obj.wavelengths, li_flux_mean_spec, label='Linear Estimation')
plt.plot(new_pca_obj.wavelengths, gp_sq_exp_flux_mean_spec, label='GP w/ Sq. Exp. Kernel')
plt.plot(new_pca_obj.wavelengths, gp_matern_52_flux_mean_spec, label='GP w/ Matern-5/2 Kernel')


plt.legend(fontsize=22)
plt.title('Mean Fractional Flux Residuals', size=24)
plt.ylabel('Mean Fractional Residuals', size=22)
plt.xlim(300, 1200)

plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_2.png')

In [ ]:
fig = plt.figure(figsize=(14, 6))

ax = plt.gca()
ax.set_xlabel('Wavelength (nm)', size=32)
mpl.rc('xtick', labelsize=22) 
mpl.rc('ytick', labelsize=22)

fig.add_subplot(1,1,1)
plt.plot(new_pca_obj.wavelengths, gp_matern_52_flux_mean_spec/nn_flux_2d_mean_spec, label='GP with Matern-5/2 / 2 NN')
plt.plot(new_pca_obj.wavelengths, gp_matern_52_flux_mean_spec/li_flux_mean_spec, label='GP with Matern-5/2 / Linear')
plt.legend(fontsize=22)
plt.title('Ratio of Mean Fractional Residuals', size=24)
plt.ylabel('Ratio of Mean Frac. Resid.', size=22)
plt.hlines(1.0, 300, 1200, colors='k', linestyles='dashed', lw=6)
plt.xlim(300, 1200)
plt.ylim(0.1, 1.3)

plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_3.png')

In [ ]:
fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(14,12))

fig.text(0.55, -0.02, 'Wavelength (nm)', ha='center', size=32)
fig.text(-0.02, 0.5, 'Flux (scaled)', va='center', rotation='vertical', size=32)

ax[0].plot(new_pca_obj.wavelengths, new_pca_obj.mean_spec, label='Mean Spectrum')
ax[0].set_title('Mean Spectrum', size=24)

ax[1].plot(new_pca_obj.wavelengths, new_pca_obj.eigenspectra[0], label='1st Eigenspectrum')

ax[1].plot(new_pca_obj.wavelengths, new_pca_obj.eigenspectra[1], label='2nd Eigenspectrum')

ax[1].plot(new_pca_obj.wavelengths, new_pca_obj.eigenspectra[2], label='3rd Eigenspectrum')
plt.legend(fontsize=24)
plt.title('1st 3 Eigenspectra', size=24)

plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_1.png')

Test with larger part of spectrum


In [ ]:
np.random.seed(2314)

li_flux_results = np.zeros((25000, 6431))
nn_flux_u_results = np.zeros((25000, 6431))
nn_flux_2d_results = np.zeros((25000, 6431))
gp_exp_flux_results = np.zeros((25000, 6431))
gp_sq_exp_flux_results = np.zeros((25000, 6431))
gp_matern_32_flux_results = np.zeros((25000, 6431))
gp_matern_52_flux_results = np.zeros((25000, 6431))

training_colors_full = []
training_coeffs_full = []
training_eigenspectra = []
training_meanspec = []

test_colors_full = []
test_exp_params = []
test_sq_exp_params = []
test_matern_32_params = []
test_matern_52_params = []
distances_all = []

test_flux_orig = []
flux_errors_full = []

total_flagged = 0
n_runs = 500
min_wavelen = 99.
max_wavelen = 2400.
n_colors = 5
n_comps = 9
for i in range(n_runs):
    if i % 20 == 0:
        print('Run %i' % i)
    
    if os.path.exists('results'):
        shutil.rmtree('results')
    
    training_colors, training_list, training_names, test_list, test_names, \
    test_fluxes, test_colors = choose_training_and_test_set(pca_obj, 
                                                            bandpass_dict, min_wavelen, max_wavelen)
    
    new_pca_obj = esp.pcaSED()
    new_pca_obj.spec_list_orig = training_list
    new_pca_obj.PCA(comps=n_comps, minWavelen=min_wavelen, maxWavelen=max_wavelen)
    
    colors = training_colors
    
    #print 'Linear Interp'
    li_spec, li_colors = linear_interpolation_spectra(colors, test_colors, new_pca_obj.spec_list_orig, 
                                                      bandpass_dict, min_wavelen, max_wavelen)
    li_flux_results[i*50:(i+1)*50] = np.abs(np.array((li_spec - test_fluxes)/test_fluxes))
    
    #print 'Nearest Neighbor Results'
    nn_obj = esp.nearestNeighborEstimate(new_pca_obj, bandpass_dict, test_colors)
    nn_spec = nn_obj.nn_predict(1)
    nn_flux_u_results[i*50:(i+1)*50] = np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes))

    nn_spec = nn_obj.nn_predict(2, knr_args=dict(weights='distance'))
    nn_flux_2d_results[i*50:(i+1)*50] = np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes))
    
    #print 'Gaussian Process Results'
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('exp', 1.0e-1, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    gp_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((gp_spec.reconstruct_spectra(n_comps) - test_fluxes)/test_fluxes))#, 
    test_exp_params.append(gp_spec.params)
    
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('sq_exp', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_sq_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))#, 
    test_sq_exp_params.append(gp_spec.params)
        
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_32', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_32_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_32_params.append(gp_spec.params)
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_52', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_52_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_52_params.append(gp_spec.params)
    
    training_colors_full.append(colors)
    test_colors_full.append(test_colors)


test_colors_full = np.array(test_colors_full)

training_colors_full = np.array(training_colors_full)

In [ ]:
print("Calculating overall means")

gp_exp_flux_mean = np.mean(gp_exp_flux_results)
nn_flux_u_mean = np.mean(nn_flux_u_results)
gp_sq_exp_flux_mean = np.mean(gp_sq_exp_flux_results)
nn_flux_2d_mean = np.mean(nn_flux_2d_results)
li_flux_mean = np.mean(li_flux_results)
gp_matern_32_flux_mean = np.mean(gp_matern_32_flux_results)
gp_matern_52_flux_mean = np.mean(gp_matern_52_flux_results)

In [ ]:
print(gp_exp_flux_mean)
print(gp_sq_exp_flux_mean)
print(gp_matern_32_flux_mean)
print(gp_matern_52_flux_mean)
print(nn_flux_u_mean)
print(nn_flux_2d_mean)
print(li_flux_mean)

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_dist_2 = np.mean(gp_exp_flux_results, axis=1)
gp_sq_exp_flux_mean_dist_2 = np.mean(gp_sq_exp_flux_results, axis=1)
nn_flux_u_mean_dist_2 = np.mean(nn_flux_u_results, axis=1)
nn_flux_2d_mean_dist_2 = np.mean(nn_flux_2d_results, axis=1)
li_flux_mean_dist_2 = np.mean(li_flux_results, axis=1)
gp_matern_32_flux_mean_dist_2 = np.mean(gp_matern_32_flux_results, axis=1)
gp_matern_52_flux_mean_dist_2 = np.mean(gp_matern_52_flux_results, axis=1)

In [ ]:
gp_exp_argsort = np.argsort(gp_exp_flux_mean_dist_2)
gp_exp_trim_25_idx = gp_exp_argsort[6250:-6250]
print(np.mean(gp_exp_flux_mean_dist_2[gp_exp_trim_25_idx]))

gp_sq_exp_argsort = np.argsort(gp_sq_exp_flux_mean_dist_2)
gp_sq_exp_trim_25_idx = gp_sq_exp_argsort[6250:-6250]
print(np.mean(gp_sq_exp_flux_mean_dist_2[gp_sq_exp_trim_25_idx]))

gp_matern_32_argsort = np.argsort(gp_matern_32_flux_mean_dist_2)
gp_matern_32_trim_25_idx = gp_matern_32_argsort[6250:-6250]
print(np.mean(gp_matern_32_flux_mean_dist_2[gp_exp_trim_25_idx]))

gp_matern_52_argsort = np.argsort(gp_matern_52_flux_mean_dist_2)
gp_matern_52_trim_25_idx = gp_matern_52_argsort[6250:-6250]
print(np.mean(gp_matern_52_flux_mean_dist_2[gp_matern_52_trim_25_idx]))

nn_flux_u_argsort = np.argsort(nn_flux_u_mean_dist_2)
nn_flux_u_trim_25_idx = nn_flux_u_argsort[6250:-6250]
print(np.mean(nn_flux_u_mean_dist_2[nn_flux_u_trim_25_idx]))

nn_flux_2d_argsort = np.argsort(nn_flux_2d_mean_dist_2)
nn_flux_2d_trim_25_idx = nn_flux_2d_argsort[6250:-6250]
print(np.mean(nn_flux_2d_mean_dist_2[nn_flux_2d_trim_25_idx]))

li_flux_argsort = np.argsort(li_flux_mean_dist_2)
li_flux_trim_25_idx = li_flux_argsort[6250:-6250]
print(np.mean(li_flux_mean_dist_2[li_flux_trim_25_idx]))

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_spec_2 = np.mean(gp_exp_flux_results, axis=0)
gp_sq_exp_flux_mean_spec_2 = np.mean(gp_sq_exp_flux_results, axis=0)
nn_flux_u_mean_spec_2 = np.mean(nn_flux_u_results, axis=0)
nn_flux_2d_mean_spec_2 = np.mean(nn_flux_2d_results, axis=0)
li_flux_mean_spec_2 = np.mean(li_flux_results, axis=0)
gp_matern_32_flux_mean_spec_2 = np.mean(gp_matern_32_flux_results, axis=0)
gp_matern_52_flux_mean_spec_2 = np.mean(gp_matern_52_flux_results, axis=0)

In [ ]:
idx_300 = np.where(new_pca_obj.wavelengths >= 299.)[0][0]
idx_1200 = np.where(new_pca_obj.wavelengths <= 1200.)[0][-1]+1
print(np.mean(gp_exp_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(gp_sq_exp_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(gp_matern_32_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(gp_matern_52_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(nn_flux_u_mean_spec_2[idx_300:idx_1200]))
print(np.mean(nn_flux_2d_mean_spec_2[idx_300:idx_1200]))
print(np.mean(li_flux_mean_spec_2[idx_300:idx_1200]))

Test with artificial filters


In [ ]:
np.random.seed(2314)

gp_exp_flux_results = np.zeros((25000, 6431))
gp_sq_exp_flux_results = np.zeros((25000, 6431))
gp_matern_32_flux_results = np.zeros((25000, 6431))
gp_matern_52_flux_results = np.zeros((25000, 6431))

training_colors_full = []
training_coeffs_full = []
training_eigenspectra = []
training_meanspec = []

test_colors_full = []
test_spectra_full = []
test_coeffs_full = []
test_exp_params = []
test_sq_exp_params = []
test_matern_32_params = []
test_matern_52_params = []
distances_all = []

test_flux_orig = []
flux_errors_full = []

total_flagged = 0
n_runs = 500
min_wavelen = 99.
max_wavelen = 2400.
n_colors = 9
n_comps = 9
for i in range(n_runs):
    if i % 20 == 0:
        print('Run %i' % i)
    
    if os.path.exists('results'):
        shutil.rmtree('results')
    
    training_colors, training_list, training_names, test_list, test_names, \
    test_fluxes, test_colors = choose_training_and_test_set(pca_obj, 
                                                            bandpass_dict, min_wavelen, max_wavelen)
    
    new_pca_obj = esp.pcaSED()
    new_pca_obj.spec_list_orig = training_list
    new_pca_obj.PCA(comps=n_comps, minWavelen=min_wavelen, maxWavelen=max_wavelen)
    
    colors = training_colors

    nbrs = NearestNeighbors(n_neighbors=1).fit(training_colors)
    distance, idx = nbrs.kneighbors(test_colors)
    distances_all.append(np.ravel(distance))
    
    #print 'Gaussian Process Results'
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('exp', 1.0e-1, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, new_bandpass_dict)
    gp_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((gp_spec.reconstruct_spectra(n_comps) - test_fluxes)/test_fluxes))#, 
    test_exp_params.append(gp_spec.params)
    
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('sq_exp', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, new_bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_sq_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))#, 
    test_sq_exp_params.append(gp_spec.params)
        
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_32', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, new_bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_32_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_32_params.append(gp_spec.params)
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_52', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, new_bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_52_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_52_params.append(gp_spec.params)
    
    training_colors_full.append(colors)
    test_colors_full.append(test_colors)

test_colors_full = np.array(test_colors_full)

training_colors_full = np.array(training_colors_full)

In [ ]:
print("Calculating overall means")

gp_exp_flux_mean = np.mean(gp_exp_flux_results)
#nn_flux_u_mean = np.mean(nn_flux_u_results)
gp_sq_exp_flux_mean = np.mean(gp_sq_exp_flux_results)
#nn_flux_2d_mean = np.mean(nn_flux_2d_results)
#nn_flux_2u_mean = np.mean(nn_flux_2u_results)
#nn_flux_4d_mean = np.mean(nn_flux_4d_results)
#nn_flux_4u_mean = np.mean(nn_flux_4u_results)
#li_flux_mean = np.mean(li_flux_results)
gp_matern_32_flux_mean = np.mean(gp_matern_32_flux_results)
gp_matern_52_flux_mean = np.mean(gp_matern_52_flux_results)

In [ ]:
print(gp_sq_exp_flux_mean)
print(gp_exp_flux_mean)
#print(nn_flux_u_mean)
#print(nn_flux_2d_mean)
#print(li_flux_mean)
print(gp_matern_32_flux_mean)
print(gp_matern_52_flux_mean)

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_spec_3 = np.mean(gp_exp_flux_results, axis=0)
gp_sq_exp_flux_mean_spec_3 = np.mean(gp_sq_exp_flux_results, axis=0)
gp_matern_32_flux_mean_spec_3 = np.mean(gp_matern_32_flux_results, axis=0)
gp_matern_52_flux_mean_spec_3 = np.mean(gp_matern_52_flux_results, axis=0)
nn_flux_u_mean_spec_3 = np.mean(nn_flux_u_results, axis=0)
li_flux_mean_spec_3 = np.mean(li_flux_results, axis=0)

In [ ]:
idx_300 = np.where(new_pca_obj.wavelengths >= 299.)[0][0]
idx_1200 = np.where(new_pca_obj.wavelengths <= 1200.)[0][-1]+1
print(np.mean(gp_exp_flux_mean_spec_3[idx_300:idx_1200]))
print(np.mean(gp_sq_exp_flux_mean_spec_3[idx_300:idx_1200]))
print(np.mean(gp_matern_32_flux_mean_spec_3[idx_300:idx_1200]))
print(np.mean(gp_matern_52_flux_mean_spec_3[idx_300:idx_1200]))

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_dist_3 = np.mean(gp_exp_flux_results, axis=1)
gp_sq_exp_flux_mean_dist_3 = np.mean(gp_sq_exp_flux_results, axis=1)
nn_flux_u_mean_dist_3 = np.mean(nn_flux_u_results, axis=1)
#nn_flux_2d_mean_dist_3 = np.mean(nn_flux_2d_results, axis=1)
li_flux_mean_dist_3 = np.mean(li_flux_results, axis=1)
gp_matern_32_flux_mean_dist_3 = np.mean(gp_matern_32_flux_results, axis=1)
gp_matern_52_flux_mean_dist_3 = np.mean(gp_matern_52_flux_results, axis=1)

In [ ]:
gp_exp_argsort = np.argsort(gp_exp_flux_mean_dist_3)
gp_exp_trim_25_idx = gp_exp_argsort[6250:-6250]
print(np.mean(gp_exp_flux_mean_dist_3[gp_exp_trim_25_idx]))

gp_sq_exp_argsort = np.argsort(gp_sq_exp_flux_mean_dist_3)
gp_sq_exp_trim_25_idx = gp_sq_exp_argsort[6250:-6250]
print(np.mean(gp_sq_exp_flux_mean_dist_3[gp_sq_exp_trim_25_idx]))

gp_matern_32_argsort = np.argsort(gp_matern_32_flux_mean_dist_3)
gp_matern_32_trim_25_idx = gp_matern_32_argsort[6250:-6250]
print(np.mean(gp_matern_32_flux_mean_dist_3[gp_exp_trim_25_idx]))

gp_matern_52_argsort = np.argsort(gp_matern_52_flux_mean_dist_3)
gp_matern_52_trim_25_idx = gp_matern_52_argsort[6250:-6250]
print(np.mean(gp_matern_52_flux_mean_dist_3[gp_matern_52_trim_25_idx]))

In [ ]:
fig = plt.figure(figsize=(14, 12))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.set_axis_bgcolor('white') 
ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
ax.set_xlabel('Wavelength (nm)', size=32)
ax.set_ylabel('Ratio of Mean Fractional Residuals', size=32)
mpl.rc('xtick', labelsize=22) 
mpl.rc('ytick', labelsize=22)

fig.add_subplot(2,1,1)
plt.plot(new_pca_obj.wavelengths, gp_matern_32_flux_mean_spec_2/nn_flux_u_mean_spec_3, label='GP with Matern-3/2 / NN', lw=4)
plt.plot(new_pca_obj.wavelengths, gp_matern_32_flux_mean_spec_2/li_flux_mean_spec_3, label='GP with Matern-3/2 / Linear', lw=4)
plt.legend(fontsize=22)
plt.title('Only training with LSST filters', size=24)
plt.hlines(1.0, 99, 2400, colors='k', linestyles='dashed', lw=6)
plt.xlim(99, 2400)

fig.add_subplot(2,1,2)
plt.plot(new_pca_obj.wavelengths, gp_matern_32_flux_mean_spec_3/nn_flux_u_mean_spec_3, label='GP with Matern-3/2 / NN', lw=4)
plt.plot(new_pca_obj.wavelengths, gp_matern_32_flux_mean_spec_3/li_flux_mean_spec_3, label='GP with Matern-3/2 / Linear', lw=4)
plt.legend(fontsize=22)
plt.title('With additional training filters', size=24)
plt.hlines(1.0, 99, 2400, colors='k', linestyles='dashed', lw=6)
plt.xlim(99, 2400)
plt.ylim(0, 2)

plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_6.png')

Look at distances from nearest neighbor


In [ ]:
np.random.seed(2314)

distances_all = []

test_flux_orig = []
flux_errors_full = []

total_flagged = 0
n_runs = 500
min_wavelen = 99.
max_wavelen = 2400.
n_colors = 9
n_comps = 9
for i in range(n_runs):
    if i % 20 == 0:
        print('Run %i' % i)
    
    if os.path.exists('results'):
        shutil.rmtree('results')
    
    training_colors, training_list, training_names, test_list, test_names, \
    test_fluxes, test_colors = choose_training_and_test_set(pca_obj, 
                                                            bandpass_dict, min_wavelen, max_wavelen)
    
    new_pca_obj = esp.pcaSED()
    new_pca_obj.spec_list_orig = training_list
    new_pca_obj.PCA(comps=n_comps, minWavelen=min_wavelen, maxWavelen=max_wavelen)
    
    colors = training_colors

    nbrs = NearestNeighbors(n_neighbors=1).fit(training_colors)
    distance, idx = nbrs.kneighbors(test_colors)
    distances_all.append(np.ravel(distance))

In [ ]:
use_distances = np.ravel(distances_all)

In [ ]:
n_bins = 25
bin_vals, gp_matern_52_dist_results = calc_dist_bins(use_distances, gp_matern_52_flux_mean_dist, n_bins)
bin_vals, nn_u_dist_results = calc_dist_bins(use_distances, nn_flux_u_mean_dist, n_bins)
bin_vals, nn_2d_dist_results = calc_dist_bins(use_distances, nn_flux_2d_mean_dist, n_bins)
bin_vals, li_dist_results = calc_dist_bins(use_distances, li_flux_mean_dist, n_bins)

In [ ]:
n_bins = 25
bin_vals, gp_matern_32_dist_results_3 = calc_dist_bins(use_distances, gp_matern_32_flux_mean_dist_3, n_bins)
bin_vals, nn_u_dist_results_3 = calc_dist_bins(use_distances, nn_flux_u_mean_dist_3, n_bins)
bin_vals, li_dist_results_3 = calc_dist_bins(use_distances, li_flux_mean_dist_3, n_bins)

In [ ]:
fig = plt.figure(figsize=(18,12))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.set_axis_bgcolor('white') 
ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
ax.set_ylabel('Ratio of Mean Fractional Residuals', size=32)
ax.set_xlabel('Distance in mags to nearest neighbor', size=32)
mpl.rc('xtick', labelsize=22) 
mpl.rc('ytick', labelsize=22)

fig.add_subplot(2,1,1)

plt.plot(bin_vals[:-1], mean_dist_results(gp_matern_52_dist_results)/mean_dist_results(nn_2d_dist_results), label='GP with Matern-5/2 / 2 NN', lw=8)
plt.plot(bin_vals[:-1], mean_dist_results(gp_matern_52_dist_results)/mean_dist_results(li_dist_results), label='GP with Matern-5/2 / LI', lw=8)
plt.xlim(0, 0.6)
plt.ylim(0, 2.)
plt.hlines(1.0, 0.0, 0.6, linestyles='dashed', lw=6)
plt.legend(fontsize=22)
plt.title('Ratio of Residual Errors in Test 1', size=32)

fig.add_subplot(2,1,2)

plt.plot(bin_vals[:-1], mean_dist_results(gp_matern_32_dist_results_3)/mean_dist_results(nn_u_dist_results_3), label='GP with Matern-3/2 / NN', lw=8)
plt.plot(bin_vals[:-1], mean_dist_results(gp_matern_32_dist_results_3)/mean_dist_results(li_dist_results_3), label='GP with Matern-3/2 / LI', lw=8)
plt.xlim(0, 0.6)
plt.ylim(0, 2.)
plt.hlines(1.0, 0.0, 0.6, linestyles='dashed', lw=6)
plt.legend(fontsize=22)
plt.title('Ratio of Residual Errors in Test 3', size=32)

plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_7.png')

In [ ]:
n, bins = np.histogram(use_distances, bins=bin_vals)

In [ ]:
np.sum(n[:12])/np.sum(n)

Narrowband filters test


In [ ]:
filters = ['F344N', 'F502N', 'F658N', 'F892N']
narrow_band_dict = {}
total_wavelengths = []
for filt_name in filters:
    narrow_band = np.genfromtxt('narrowband_filters/HST_ACS_HRC.%s.dat' % filt_name, unpack=True)
    narrow_band[0] /= 10. # Convert to nanometers
    total_wavelengths.append(narrow_band[0])
    narrow_band_dict[filt_name] = narrow_band

In [ ]:
total_wavelengths = [x for y in total_wavelengths for x in y]

In [ ]:
narrow_bandpasses = []
for filt_name in filters:
    bandpass = np.zeros(len(total_wavelengths))
    min_idx = np.where(total_wavelengths == narrow_band_dict[filt_name][0][0])[0][0]
    max_idx = np.where(total_wavelengths == narrow_band_dict[filt_name][0][-1])[0][0]
    bandpass[min_idx:max_idx+1] = narrow_band_dict[filt_name][1]
    narrow_bandpasses.append(Bandpass(wavelen=np.array(total_wavelengths), sb=bandpass))

In [ ]:
narrow_bandpass_dict = BandpassDict(narrow_bandpasses, filters)

In [ ]:
np.random.seed(2314)

li_flux_results = np.zeros((25000, 6071))
nn_flux_u_results = np.zeros((25000, 6071))
#nn_flux_2u_results = np.zeros((25000, 6071))
nn_flux_2d_results = np.zeros((25000, 6071))
#nn_flux_4u_results = np.zeros((25000, 6071))
#nn_flux_4d_results = np.zeros((25000, 6071))
gp_exp_flux_results = np.zeros((25000, 6071))
gp_sq_exp_flux_results = np.zeros((25000, 6071))
gp_matern_32_flux_results = np.zeros((25000, 6071))
gp_matern_52_flux_results = np.zeros((25000, 6071))

training_colors_full = []
training_coeffs_full = []
training_eigenspectra = []
training_meanspec = []

test_colors_full = []
test_exp_params = []
test_sq_exp_params = []
test_matern_32_params = []
test_matern_52_params = []
distances_all = []


total_flagged = 0
n_runs = 500
min_wavelen = 299.
max_wavelen = 1200.
n_colors = 3
n_comps = 9

for i in range(n_runs):
    if i % 20 == 0:
        print('Run %i' % i)
    
    if os.path.exists('results'):
        shutil.rmtree('results')
    
    training_colors, training_list, training_names, test_list, test_names, \
    test_fluxes, test_colors = choose_training_and_test_set(pca_obj, 
                                                            narrow_bandpass_dict, min_wavelen, max_wavelen)
    
    new_pca_obj = esp.pcaSED()
    new_pca_obj.spec_list_orig = training_list
    new_pca_obj.PCA(comps=n_comps, minWavelen=min_wavelen, maxWavelen=max_wavelen)
    
    colors = training_colors

    nbrs = NearestNeighbors(n_neighbors=1).fit(training_colors)
    distance, idx = nbrs.kneighbors(test_colors)
    distances_all.append(np.ravel(distance))
    
    #print 'Linear Interp'
    li_spec, li_colors = linear_interpolation_spectra(colors, test_colors, new_pca_obj.spec_list_orig, 
                                                      narrow_bandpass_dict, min_wavelen, max_wavelen)
    li_flux_results[i*50:(i+1)*50] = np.abs(np.array((li_spec - test_fluxes)/test_fluxes))
    
    #print 'Nearest Neighbor Results'
    nn_obj = esp.nearestNeighborEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    nn_spec = nn_obj.nn_predict(1)
    nn_flux_u_results[i*50:(i+1)*50] = np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes))

    #nn_spec = nn_obj.nn_predict(2)
    #nn_flux_2u_results.append(np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes)))
    nn_spec = nn_obj.nn_predict(2, knr_args=dict(weights='distance'))
    nn_flux_2d_results[i*50:(i+1)*50] = np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes))

    #nn_spec = nn_obj.nn_predict(4)
    #nn_flux_4u_results.append(np.abs(np.array((nn_spec.reconstruct_spectra(10) - test_fluxes)/test_fluxes)))
    #nn_spec = nn_obj.nn_predict(4, knr_args=dict(weights='distance'))
    
    #print 'Gaussian Process Results'
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('exp', 1.0e-1, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict)
    gp_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((gp_spec.reconstruct_spectra(n_comps) - test_fluxes)/test_fluxes))#, 
    test_exp_params.append(gp_spec.params)
    
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('sq_exp', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_sq_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))#, 
    test_sq_exp_params.append(gp_spec.params)
        
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_32', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_32_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_32_params.append(gp_spec.params)
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_52', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_52_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_52_params.append(gp_spec.params)
    
    training_colors_full.append(colors)
    test_colors_full.append(test_colors)


test_colors_full = np.array(test_colors_full)

training_colors_full = np.array(training_colors_full)

In [ ]:
print("Calculating overall means")

gp_exp_flux_mean_nb = np.mean(gp_exp_flux_results)
nn_flux_u_mean_nb = np.mean(nn_flux_u_results)
gp_sq_exp_flux_mean_nb = np.mean(gp_sq_exp_flux_results)
nn_flux_2d_mean_nb = np.mean(nn_flux_2d_results)
li_flux_mean_nb = np.mean(li_flux_results)
gp_matern_32_flux_mean_nb = np.mean(gp_matern_32_flux_results)
gp_matern_52_flux_mean_nb = np.mean(gp_matern_52_flux_results)

In [ ]:
print(gp_exp_flux_mean_nb)
print(gp_sq_exp_flux_mean_nb)
print(gp_matern_32_flux_mean_nb)
print(gp_matern_52_flux_mean_nb)
print(nn_flux_u_mean_nb)
print(nn_flux_2d_mean_nb)
print(li_flux_mean_nb)

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_dist_nb = np.mean(gp_exp_flux_results, axis=1)
gp_sq_exp_flux_mean_dist_nb = np.mean(gp_sq_exp_flux_results, axis=1)
nn_flux_u_mean_dist_nb = np.mean(nn_flux_u_results, axis=1)
nn_flux_2d_mean_dist_nb = np.mean(nn_flux_2d_results, axis=1)
li_flux_mean_dist_nb = np.mean(li_flux_results, axis=1)
gp_matern_32_flux_mean_dist_nb = np.mean(gp_matern_32_flux_results, axis=1)
gp_matern_52_flux_mean_dist_nb = np.mean(gp_matern_52_flux_results, axis=1)

In [ ]:
gp_exp_argsort = np.argsort(gp_exp_flux_mean_dist_nb)
gp_exp_trim_25_idx = gp_exp_argsort[6250:-6250]
print(np.mean(gp_exp_flux_mean_dist_nb[gp_exp_trim_25_idx]))

gp_sq_exp_argsort = np.argsort(gp_sq_exp_flux_mean_dist_nb)
gp_sq_exp_trim_25_idx = gp_sq_exp_argsort[6250:-6250]
print(np.mean(gp_sq_exp_flux_mean_dist_nb[gp_sq_exp_trim_25_idx]))

gp_matern_32_argsort = np.argsort(gp_matern_32_flux_mean_dist_nb)
gp_matern_32_trim_25_idx = gp_matern_32_argsort[6250:-6250]
print(np.mean(gp_matern_32_flux_mean_dist_nb[gp_exp_trim_25_idx]))

gp_matern_52_argsort = np.argsort(gp_matern_52_flux_mean_dist_nb)
gp_matern_52_trim_25_idx = gp_matern_52_argsort[6250:-6250]
print(np.mean(gp_matern_52_flux_mean_dist_nb[gp_matern_52_trim_25_idx]))

nn_flux_u_argsort = np.argsort(nn_flux_u_mean_dist_nb)
nn_flux_u_trim_25_idx = nn_flux_u_argsort[6250:-6250]
print(np.mean(nn_flux_u_mean_dist_nb[nn_flux_u_trim_25_idx]))

nn_flux_2d_argsort = np.argsort(nn_flux_2d_mean_dist_nb)
nn_flux_2d_trim_25_idx = nn_flux_2d_argsort[6250:-6250]
print(np.mean(nn_flux_2d_mean_dist_nb[nn_flux_2d_trim_25_idx]))

li_flux_argsort = np.argsort(li_flux_mean_dist_nb)
li_flux_trim_25_idx = li_flux_argsort[6250:-6250]
print(np.mean(li_flux_mean_dist_nb[li_flux_trim_25_idx]))

In [ ]:
narrow_bandpasses.insert(0, blue_bandpass_3)
narrow_bandpasses.insert(0, blue_bandpass_1)
narrow_bandpasses.append(bandpass_dict['y'])
narrow_bandpasses.append(red_bandpass_2)
narrow_bandpasses.append(red_bandpass_4)

In [ ]:
narrow_bandpass_dict_training = BandpassDict(narrow_bandpasses, ['blue_1', 'blue_3', filters[0], filters[1],
                                                                 filters[2], filters[3], 'y', 'red_2', 'red_4'])

In [ ]:
np.random.seed(2314)

li_flux_results = np.zeros((25000, 6431))
nn_flux_u_results = np.zeros((25000, 6431))
nn_flux_2d_results = np.zeros((25000, 6431))
gp_exp_flux_results = np.zeros((25000, 6431))
gp_sq_exp_flux_results = np.zeros((25000, 6431))
gp_matern_32_flux_results = np.zeros((25000, 6431))
gp_matern_52_flux_results = np.zeros((25000, 6431))

training_colors_full = []
training_coeffs_full = []
training_eigenspectra = []
training_meanspec = []

test_colors_full = []
test_spectra_full = []
test_coeffs_full = []
test_exp_params = []
test_sq_exp_params = []
test_matern_32_params = []
test_matern_52_params = []
test_var = []
distances_all = []

test_flux_orig = []
flux_errors_full = []

total_flagged = 0
n_runs = 500
min_wavelen = 99.
max_wavelen = 2400.
n_colors = 8
n_comps = 9
for i in range(n_runs):
    if i % 20 == 0:
        print('Run %i' % i)
    
    if os.path.exists('results'):
        shutil.rmtree('results')
    
    training_colors, training_list, training_names, test_list, test_names, \
    test_fluxes, test_colors = choose_training_and_test_set(pca_obj, 
                                                            narrow_bandpass_dict, min_wavelen, max_wavelen)
    
    new_pca_obj = esp.pcaSED()
    new_pca_obj.spec_list_orig = training_list
    new_pca_obj.PCA(comps=n_comps, minWavelen=min_wavelen, maxWavelen=max_wavelen)
    
    colors = training_colors

    nbrs = NearestNeighbors(n_neighbors=1).fit(training_colors)
    distance, idx = nbrs.kneighbors(test_colors)
    distances_all.append(np.ravel(distance))
    
    #print 'Gaussian Process Results'
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('exp', 1.0e-1, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict_training)
    gp_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((gp_spec.reconstruct_spectra(n_comps) - test_fluxes)/test_fluxes))#, 
    test_exp_params.append(gp_spec.params)
    
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('sq_exp', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict_training)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_sq_exp_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))#, 
    test_sq_exp_params.append(gp_spec.params)
        
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_32', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict_training)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_32_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_32_params.append(gp_spec.params)
    
    gp_obj = esp.gaussianProcessEstimate(new_pca_obj, narrow_bandpass_dict, test_colors)
    gp_kernel = gp_obj.define_kernel('matern_52', 1.0e-3, 1.0e-2, n_colors)
    gp_spec = gp_obj.gp_predict(gp_kernel, narrow_bandpass_dict_training)
    recon_spectra = gp_spec.reconstruct_spectra(n_comps)
    gp_matern_52_flux_results[i*50:(i+1)*50] = np.abs(np.array((recon_spectra - test_fluxes)/test_fluxes))
    test_matern_52_params.append(gp_spec.params)
    
    training_colors_full.append(colors)
    test_colors_full.append(test_colors)

test_colors_full = np.array(test_colors_full)

training_colors_full = np.array(training_colors_full)

In [ ]:
print("Calculating overall means")

gp_exp_flux_mean_nb_2 = np.mean(gp_exp_flux_results)
nn_flux_u_mean_nb_2 = np.mean(nn_flux_u_results)
gp_sq_exp_flux_mean_nb_2 = np.mean(gp_sq_exp_flux_results)
nn_flux_2d_mean_nb_2 = np.mean(nn_flux_2d_results)
li_flux_mean_nb_2 = np.mean(li_flux_results)
gp_matern_32_flux_mean_nb_2 = np.mean(gp_matern_32_flux_results)
gp_matern_52_flux_mean_nb_2 = np.mean(gp_matern_52_flux_results)

In [ ]:
print(gp_exp_flux_mean_nb_2)
print(gp_sq_exp_flux_mean_nb_2)
print(gp_matern_32_flux_mean_nb_2)
print(gp_matern_52_flux_mean_nb_2)
print(nn_flux_u_mean_nb_2)
print(nn_flux_2d_mean_nb_2)
print(li_flux_mean_nb_2)

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_spec_2 = np.mean(gp_exp_flux_results, axis=0)
gp_sq_exp_flux_mean_spec_2 = np.mean(gp_sq_exp_flux_results, axis=0)
nn_flux_u_mean_spec_2 = np.mean(nn_flux_u_results, axis=0)
nn_flux_2d_mean_spec_2 = np.mean(nn_flux_2d_results, axis=0)
li_flux_mean_spec_2 = np.mean(li_flux_results, axis=0)
gp_matern_32_flux_mean_spec_2 = np.mean(gp_matern_32_flux_results, axis=0)
gp_matern_52_flux_mean_spec_2 = np.mean(gp_matern_52_flux_results, axis=0)

In [ ]:
idx_300 = np.where(new_pca_obj.wavelengths >= 299.)[0][0]
idx_1200 = np.where(new_pca_obj.wavelengths <= 1200.)[0][-1]+1
print(np.mean(gp_exp_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(gp_sq_exp_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(gp_matern_32_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(gp_matern_52_flux_mean_spec_2[idx_300:idx_1200]))
print(np.mean(nn_flux_u_mean_spec_2[idx_300:idx_1200]))
print(np.mean(nn_flux_2d_mean_spec_2[idx_300:idx_1200]))
print(np.mean(li_flux_mean_spec_2[idx_300:idx_1200]))

In [ ]:
print("Calculating mean spectra")

gp_exp_flux_mean_dist_nb_2 = np.mean(gp_exp_flux_results, axis=1)
gp_sq_exp_flux_mean_dist_nb_2 = np.mean(gp_sq_exp_flux_results, axis=1)
nn_flux_u_mean_dist_nb_2 = np.mean(nn_flux_u_results, axis=1)
nn_flux_2d_mean_dist_nb_2 = np.mean(nn_flux_2d_results, axis=1)
li_flux_mean_dist_nb_2 = np.mean(li_flux_results, axis=1)
gp_matern_32_flux_mean_dist_nb_2 = np.mean(gp_matern_32_flux_results, axis=1)
gp_matern_52_flux_mean_dist_nb_2 = np.mean(gp_matern_52_flux_results, axis=1)

In [ ]:
gp_exp_argsort = np.argsort(gp_exp_flux_mean_dist_nb_2)
gp_exp_trim_25_idx = gp_exp_argsort[6250:-6250]
print(np.mean(gp_exp_flux_mean_dist_nb_2[gp_exp_trim_25_idx]))

gp_sq_exp_argsort = np.argsort(gp_sq_exp_flux_mean_dist_nb_2)
gp_sq_exp_trim_25_idx = gp_sq_exp_argsort[6250:-6250]
print(np.mean(gp_sq_exp_flux_mean_dist_nb_2[gp_sq_exp_trim_25_idx]))

gp_matern_32_argsort = np.argsort(gp_matern_32_flux_mean_dist_nb_2)
gp_matern_32_trim_25_idx = gp_matern_32_argsort[6250:-6250]
print(np.mean(gp_matern_32_flux_mean_dist_nb_2[gp_exp_trim_25_idx]))

gp_matern_52_argsort = np.argsort(gp_matern_52_flux_mean_dist_nb_2)
gp_matern_52_trim_25_idx = gp_matern_52_argsort[6250:-6250]
print(np.mean(gp_matern_52_flux_mean_dist_nb_2[gp_matern_52_trim_25_idx]))

nn_flux_u_argsort = np.argsort(nn_flux_u_mean_dist_nb_2)
nn_flux_u_trim_25_idx = nn_flux_u_argsort[6250:-6250]
print(np.mean(nn_flux_u_mean_dist_nb_2[nn_flux_u_trim_25_idx]))

nn_flux_2d_argsort = np.argsort(nn_flux_2d_mean_dist_nb_2)
nn_flux_2d_trim_25_idx = nn_flux_2d_argsort[6250:-6250]
print(np.mean(nn_flux_2d_mean_dist_nb_2[nn_flux_2d_trim_25_idx]))

li_flux_argsort = np.argsort(li_flux_mean_dist_nb_2)
li_flux_trim_25_idx = li_flux_argsort[6250:-6250]
print(np.mean(li_flux_mean_dist_nb_2[li_flux_trim_25_idx]))

Photometric Redshift Test

Create LePhare formatted catalog


In [ ]:
# Load catalog (To request catalog email author)
esp_test_cat = pd.read_csv('esp_test_cat.csv')

In [ ]:
# Output catalog in LePhare format
esp_test_cat['context'] = 63
cols = esp_test_cat.columns.tolist()
print(cols)

lephare_cols = cols[2:] + [cols[1]] + [cols[0]]
print(lephare_cols)

lephare_cat = esp_test_cat[lephare_cols]

In [ ]:
# Write LePhare cat to file
lephare_cat.to_csv('esp_lephare_cat.in', sep=' ', header=False)

Create template sets


In [ ]:
np.random.seed(1255)
rand_sed_nums = np.random.choice(len(pca_obj.spec_list_orig), 
                                 size=60,
                                 replace=False)
template_set = []
for sed_num in rand_sed_nums:
    template_set.append(pca_obj.spec_list_orig[sed_num])

In [ ]:
# BC03 templates
template_folder = 'photoz_template_set'
os.mkdir(template_folder)
os.mkdir(str(template_folder + '/ESP_10'))
sed_names = []
for sed_obj in template_set[:10]:
    new_sed_obj = Sed()
    new_sed_obj.setSED(wavelen=10.*sed_obj.wavelen, flambda=0.1*sed_obj.flambda)
    new_sed_obj.writeSED(str(template_folder + '/ESP_10/' + sed_obj.name[:-3] + '.sed'))
    sed_names.append('ESP_10/' + sed_obj.name[:-3] + '.sed')
np.savetxt(str(template_folder + '/ESP_10/' + 'seds.list'), sed_names, fmt=['%s'])
os.mkdir(str(template_folder + '/ESP_60'))
sed_names = []
for sed_obj in template_set[:60]:
    new_sed_obj = Sed()
    new_sed_obj.setSED(wavelen=10.*sed_obj.wavelen, flambda=0.1*sed_obj.flambda)
    new_sed_obj.writeSED(str(template_folder + '/ESP_60/' + sed_obj.name[:-3] + '.sed'))
    sed_names.append('ESP_60/' + sed_obj.name[:-3] + '.sed')
#np.savetxt(str(template_folder + '/ESP_60/' + 'seds.list'), sed_names, fmt=['%s'])

In [ ]:
new_pca_obj = esp.pcaSED()
new_pca_obj.spec_list_orig = template_set[:10]
min_wavelen = 99.
max_wavelen = 2400.
new_pca_obj.PCA(comps=9, minWavelen=min_wavelen, maxWavelen=max_wavelen)
esp_cat_bandpass_dict = BandpassDict.loadTotalBandpassesFromFiles(bandpassDir = 'photoz_bandpasses/')    
colors = []
for spec_num in range(10):
    
    test_sed = Sed()
    test_sed.setSED(wavelen=new_pca_obj.spec_list_orig[spec_num].wavelen,
                    flambda=new_pca_obj.spec_list_orig[spec_num].flambda)
        
    test_mags = esp_cat_bandpass_dict.magListForSed(test_sed)
        
    test_colors = [test_mags[x] - test_mags[x+1] for x in range(len(test_mags)-1)]
    colors.append(test_colors)

In [ ]:
# Cluster catalog to find where we should estimate spectra
from sklearn.cluster import KMeans
esp_cat_colors = np.array([esp_test_cat['sdss_u'] - esp_test_cat['sdss_g'],
                           esp_test_cat['sdss_g'] - esp_test_cat['sdss_r'],
                           esp_test_cat['sdss_r'] - esp_test_cat['sdss_i'],
                           esp_test_cat['sdss_i'] - esp_test_cat['sdss_z'],
                           esp_test_cat['sdss_z'] - esp_test_cat['ps_y']]).T
kmeans = KMeans(n_clusters=50, random_state=1428).fit(esp_cat_colors)

In [ ]:
interp_colors = kmeans.cluster_centers_

In [ ]:
photoz_train_dict = BandpassDict([
                                  blue_bandpass_1, blue_bandpass_3, 
                                  #blue_bandpass_3, blue_bandpass_4,
                                  esp_cat_bandpass_dict['u'], esp_cat_bandpass_dict['g'], 
                                  esp_cat_bandpass_dict['r'], esp_cat_bandpass_dict['i'], esp_cat_bandpass_dict['z'], 
                                  esp_cat_bandpass_dict['y'], #red_bandpass_1, red_bandpass_2, 
                                  red_bandpass_2, red_bandpass_4
                                 ],
                                ['blue_1', #'blue_2', 
                                 'blue_3', #'blue_4', 
                                 'u', 'g', 'r', 'i', 'z', 'y', 
                                 'red_2',#'red_2', 
                                 'red_4', #'red_4'
                                ])

In [ ]:
gp_obj = esp.gaussianProcessEstimate(new_pca_obj, esp_cat_bandpass_dict, interp_colors)
gp_kernel = gp_obj.define_kernel('exp', 1.0e-1, 1.0e-2, 9)
gp_spec = gp_obj.gp_predict(gp_kernel, photoz_train_dict)
gp_spectra = gp_spec.reconstruct_spectra(9)

In [ ]:
os.mkdir(str(template_folder + '/ESP_EXP_KERNEL'))
sed_names = []
for sed_obj in template_set[:10]:
    new_sed_obj = Sed()
    new_sed_obj.setSED(wavelen=10.*sed_obj.wavelen, flambda=0.1*sed_obj.flambda)
    new_sed_obj.writeSED(str(template_folder + '/ESP_EXP_KERNEL/' + sed_obj.name[:-3] + '.sed'))
    sed_names.append('ESP_EXP_KERNEL/' + sed_obj.name[:-3] + '.sed')
spec_on = 0
for sed_obj in gp_spectra:
    new_sed_obj = Sed()
    new_sed_obj.setSED(wavelen=10.*new_pca_obj.wavelengths, 
                       flambda=0.1*sed_obj)
    new_sed_obj.writeSED(str(template_folder + '/ESP_EXP_KERNEL/esp_' + str(spec_on) + '.sed'))
    sed_names.append('ESP_EXP_KERNEL/esp_%i.sed' % spec_on)
    spec_on+=1
#np.savetxt(str(template_folder + '/ESP_EXP_KERNEL/' + 'seds.list'), sed_names, fmt=['%s'])

In [ ]:
# Load LePhare output
template_10 = np.genfromtxt('esp_10.out', usecols=[1], names=['z_est_10'])
template_60 = np.genfromtxt('esp_60.out', usecols=[1,22], names=['z_est_60', 'z_true'])
template_exp_kernel = np.genfromtxt('esp_exp_kernel.out', usecols=[1], names=['z_est_exp_kernel'])

In [ ]:
template_results = pd.DataFrame()
template_results['z_est_10'] = template_10['z_est_10']
template_results['z_est_60'] = template_60['z_est_60']
template_results['z_true'] = template_60['z_true']
template_results['z_est_exp_kernel'] = template_exp_kernel['z_est_exp_kernel']

In [ ]:
template_results[:5]

In [ ]:
def calc_bins(z_est, z_true, z_max, n_bins):
    delta_z = (z_true - z_est) / (1. + z_true)
    bin_vals = [(float(i)/n_bins)*z_max for i in range(n_bins)]
    bin_vals.append(float(z_max))
    idx_sort = z_true.argsort()
    delta_z_sort = delta_z[idx_sort]
    z_true_sort = z_true[idx_sort]
    idx_bins = z_true_sort.searchsorted(bin_vals)
    delta_z_binned = [delta_z_sort[idx_bins[i]:idx_bins[i+1]] for i in range(n_bins)]
    return delta_z_binned

In [ ]:
from scipy.stats import trim_mean
def photo_z_bias(z_est, z_true, z_max, n_bins):
    delta_z_binned = calc_bins(z_est, z_true, z_max, n_bins)
    bias_results = []
    for delta_z_data in delta_z_binned:
        trimmed_mean = trim_mean(delta_z_data, .25)
        bias_results.append(trimmed_mean)
    return np.array(bias_results)

In [ ]:
from scipy.stats import trim_mean
def photo_z_abs_bias(z_est, z_true, z_max, n_bins):
    delta_z_binned = calc_bins(z_est, z_true, z_max, n_bins)
    bias_results = []
    for delta_z_data in delta_z_binned:
        trimmed_mean = trim_mean(np.abs(delta_z_data), .25)
        bias_results.append(trimmed_mean)
    return np.array(bias_results)

In [ ]:
def photo_z_stdev(z_est, z_true, z_max, n_bins):
    delta_z_binned = calc_bins(z_est, z_true, z_max, n_bins)
    stdev_results = []
    for delta_z_data in delta_z_binned:
        bin_mean = np.mean(delta_z_data)
        diffs = delta_z_data - bin_mean
        diffs_sq_mean = np.mean(diffs**2.)
        stdev_results.append(np.sqrt(diffs_sq_mean))
    return np.array(stdev_results)

In [ ]:
def photo_z_stdev_iqr(z_est, z_true, z_max, n_bins):
    delta_z_binned = calc_bins(z_est, z_true, z_max, n_bins)
    stdev_iqr_results = []
    for delta_z_data in delta_z_binned:
        bin_25 = np.percentile(delta_z_data, 25.)
        bin_75 = np.percentile(delta_z_data, 75.)
        diff = bin_75 - bin_25
        stdev_iqr_results.append(diff/1.349)
    return np.array(stdev_iqr_results)

In [ ]:
def photo_z_outlier_frac(z_est, z_true, z_max, n_bins):
    stdev_iqr_results = photo_z_stdev_iqr(z_est, z_true, z_max, n_bins)
    delta_z_binned = calc_bins(z_est, z_true, z_max, n_bins)
    outlier_frac_results = []
    for delta_z_data, stdev_iqr_val in zip(delta_z_binned, stdev_iqr_results):
        if 3.*stdev_iqr_val < 0.06:
            outlier_thresh = 0.06
        else:
            outlier_thresh = 3.*stdev_iqr_val
        #print outlier_thresh, np.std(delta_z_data), len(delta_z_data), np.max(delta_z_data), np.min(delta_z_data)
        total_bin_obj = float(len(delta_z_data))
        outliers = np.where(np.abs(delta_z_data) > outlier_thresh)[0]
        #print outliers
        outlier_frac = len(outliers)/total_bin_obj
        outlier_frac_results.append(outlier_frac)
    return np.array(outlier_frac_results)

In [ ]:
z_max = 3.
n_bins = 1

delta_z_binned = calc_bins(template_results['z_est_10'], template_results['z_true'], 
                           z_max, n_bins)
bin_counts = [len(bin_results) for bin_results in delta_z_binned]
print(bin_counts)

bias_results_10 = photo_z_bias(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
bias_results_60 = photo_z_bias(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
bias_results_exp = photo_z_bias(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)

print(np.mean(bias_results_10))
print(np.mean(bias_results_60))
print(np.mean(bias_results_exp))

print('\n')

stdev_results_10 = photo_z_stdev(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
stdev_results_60 = photo_z_stdev(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
stdev_results_exp = photo_z_stdev(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)
print(np.mean(stdev_results_10))
print(np.mean(stdev_results_60))
print(np.mean(stdev_results_exp))

print('\n')

stdev_iqr_results_10 = photo_z_stdev_iqr(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
stdev_iqr_results_60 = photo_z_stdev_iqr(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
stdev_iqr_results_exp = photo_z_stdev_iqr(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)

print(np.mean(stdev_iqr_results_10))
print(np.mean(stdev_iqr_results_60))
print(np.mean(stdev_iqr_results_exp))

print('\n')

outlier_frac_results_10 = photo_z_outlier_frac(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
outlier_frac_results_60 = photo_z_outlier_frac(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
outlier_frac_results_exp = photo_z_outlier_frac(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)
print(np.mean(outlier_frac_results_10))
print(np.mean(outlier_frac_results_60))
print(np.mean(outlier_frac_results_exp))

In [ ]:
print('Bias', (0.0070834922179 - 0.0513398669249) / 0.0513398669249)
print((0.013491192485 - 0.0513398669249) / 0.0513398669249)
print('Standard Dev', (0.217399105893 - 0.278779776263) / 0.278779776263)
print((0.173433673493 - 0.278779776263) / 0.278779776263)
print('Standard Dev IQR', (0.0475990008076 - 0.12091776158) / 0.12091776158)
print((0.036184609478 - 0.12091776158) / 0.12091776158)

In [ ]:
fig = plt.figure(figsize=(10,6))
z_max = 3.0
bin_width = 0.1
n_bins = int(z_max/bin_width)
bias_results_10 = photo_z_bias(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
bias_results_60 = photo_z_bias(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
bias_results_exp = photo_z_bias(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)
plt.plot(np.arange(0,z_max,bin_width), bias_results_10, label='10 templates')
plt.plot(np.arange(0,z_max,bin_width), bias_results_60, label='60 templates')
plt.plot(np.arange(0,z_max,bin_width), bias_results_exp, label='10 + Exp Kernel templates')
plt.legend()
plt.xlabel('True Redshift')
plt.ylabel('Bias')

In [ ]:
fig = plt.figure(figsize=(10,6))
z_max = 3.0
bin_width = 0.1
n_bins = int(z_max/bin_width)
stdev_results_10 = photo_z_stdev(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
stdev_results_60 = photo_z_stdev(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
stdev_results_exp = photo_z_stdev(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)
plt.plot(np.arange(0,z_max,bin_width), stdev_results_10, label='10 templates')
plt.plot(np.arange(0,z_max,bin_width), stdev_results_60, label='60 templates')
plt.plot(np.arange(0,z_max,bin_width), stdev_results_exp, label='10 + Exp Kernel templates')
plt.legend()
plt.xlabel('True Redshift')
plt.ylabel('Standard Deviation')

In [ ]:
fig = plt.figure(figsize=(10,6))
z_max = 3.0
bin_width = 0.1
n_bins = int(z_max/bin_width)
stdev_iqr_results_10 = photo_z_stdev_iqr(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
stdev_iqr_results_60 = photo_z_stdev_iqr(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
stdev_iqr_results_exp = photo_z_stdev_iqr(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)
plt.plot(np.arange(0,z_max,bin_width), stdev_iqr_results_10, label='10 templates')
plt.plot(np.arange(0,z_max,bin_width), stdev_iqr_results_60, label='60 templates')
plt.plot(np.arange(0,z_max,bin_width), stdev_iqr_results_exp, label='10 + Exp Kernel templates')
plt.legend()
plt.xlabel('True Redshift')
plt.ylabel('Standard Deviation of IQR')

In [ ]:
fig = plt.figure(figsize=(10,6))
z_max = 3.0
bin_width = 0.1
n_bins = int(z_max/bin_width)
outlier_frac_results_10 = photo_z_outlier_frac(template_results['z_est_10'],
                               template_results['z_true'], z_max, n_bins)
outlier_frac_results_60 = photo_z_outlier_frac(template_results['z_est_60'],
                               template_results['z_true'], z_max, n_bins)
outlier_frac_results_exp = photo_z_outlier_frac(template_results['z_est_exp_kernel'],
                               template_results['z_true'], z_max, n_bins)
plt.plot(np.arange(0,z_max,bin_width), outlier_frac_results_10, label='10 templates')
plt.plot(np.arange(0,z_max,bin_width), outlier_frac_results_60, label='60 templates')
plt.plot(np.arange(0,z_max,bin_width), outlier_frac_results_exp, label='10 + Exp Kernel templates')
plt.legend()
plt.xlabel('True Redshift')
plt.ylabel('Fraction of Outliers')

In [ ]:
template_results_max3 = template_results.query('z_true < 3')

In [ ]:
fig = plt.figure(figsize=(16, 14))

ax = fig.add_subplot(111)
ax.spines['top'].set_color('none')
ax.spines['bottom'].set_color('none')
ax.spines['left'].set_color('none')
ax.spines['right'].set_color('none')
ax.set_axis_bgcolor('white') 
ax.tick_params(labelcolor='w', top='off', bottom='off', left='off', right='off')
mpl.rc('xtick', labelsize=22) 
mpl.rc('ytick', labelsize=22)

fig.add_subplot(2,2,1)
plt.plot(np.arange(0,z_max,bin_width), bias_results_10, label='10 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), bias_results_60, label='60 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), bias_results_exp, label='10 + 50 Exp Kernel templates', lw=6)
plt.xlabel('True Redshift', size=22)
plt.ylabel('Bias', size=22)

fig.add_subplot(2,2,2)
plt.plot(np.arange(0,z_max,bin_width), stdev_results_10, label='10 BC03 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), stdev_results_60, label='60 BC03 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), stdev_results_exp, label='10 BC03 + 50 Exp Kernel templates', lw=6)
plt.legend(fontsize=16)
plt.xlabel('True Redshift', size=22)
plt.ylabel('Standard Deviation', size=22)

fig.add_subplot(2,2,3)
plt.plot(np.arange(0,z_max,bin_width), stdev_iqr_results_10, label='10 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), stdev_iqr_results_60, label='60 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), stdev_iqr_results_exp, label='10 + 50 Exp Kernel templates', lw=6)
plt.xlabel('True Redshift', size=22)
plt.ylabel('Standard Deviation of IQR', size=22)

fig.add_subplot(2,2,4)
plt.plot(np.arange(0,z_max,bin_width), outlier_frac_results_10, label='10 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), outlier_frac_results_60, label='60 templates', lw=6)
plt.plot(np.arange(0,z_max,bin_width), outlier_frac_results_exp, label='10 + 50 Exp Kernel templates', lw=6)
plt.xlabel('True Redshift', size=22)
plt.ylabel('Fraction of Outliers', size=22)
plt.tight_layout()
#plt.savefig('paper_plots_revised/fig_9.png')

In [ ]:
x = template_results_max3['z_true']
y = template_results_max3['z_est_10']
y_exp = template_results_max3['z_est_exp_kernel']

fig = plt.figure(figsize=(24, 12))
fig.add_subplot(1, 2, 1)
plt.scatter(x, y, alpha=0.1)
plt.plot(np.arange(0, 3.005, 0.01), np.arange(0, 3.005, 0.01), '--k')
plt.ylim(-0.1, 5.55)
plt.xlabel('z_true', size=32)
plt.ylabel('z_phot', size=32)
plt.title('10 BC03 Templates', size=32)
fig.add_subplot(1, 2, 2)
plt.scatter(x, y_exp, alpha=0.1)
plt.plot(np.arange(0, 3.005, 0.01), np.arange(0, 3.005, 0.01), '--k')
plt.ylim(-0.1, 5.55)
plt.xlabel('z_true', size=32)
plt.ylabel('z_phot', size=32)
plt.title('10 + 50 Exp Kernel Templates', size=32)
#plt.savefig('paper_plots_revised/fig_11.png')

Testing spectrum degeneracies in color space


In [ ]:
wave_nm = pca_obj.spec_list_orig[0].wavelen
test_idx = np.where((wave_nm <= 2400.) & (wave_nm >= 99.))
wave_nm_test = wave_nm[np.where((wave_nm <= 2400.) & (wave_nm >= 99.))]

test_colors = []
test_fluxes = []
for test_sed in pca_obj.spec_list_orig:
        
    test_fluxes.append(pca_obj.scale_spectrum(test_sed.flambda[test_idx]))
        
    test_mags = bandpass_dict.magListForSed(test_sed)
        
    colors = [test_mags[x] - test_mags[x+1] for x in range(len(test_mags)-1)]
    test_colors.append(colors)

In [ ]:
nbrs = NearestNeighbors(n_neighbors=300).fit(test_colors)
distance, idx = nbrs.kneighbors(test_colors)

In [ ]:
radius = .1
num_in = []
spec_near = []
for spec_on in range(len(pca_obj.spec_list_orig)):
    spec_near.append(idx[spec_on][np.where(distance[spec_on] < radius)[0][1:]])
    num_in.append(len(spec_near[-1]))

In [ ]:
# Repeat with added filters
wave_nm = pca_obj.spec_list_orig[0].wavelen
test_idx = np.where((wave_nm <= 2400.) & (wave_nm >= 99.))
wave_nm_test = wave_nm[np.where((wave_nm <= 2400.) & (wave_nm >= 99.))]

test_colors = []
test_fluxes = []
for test_sed in pca_obj.spec_list_orig:
        
    test_fluxes.append(pca_obj.scale_spectrum(test_sed.flambda[test_idx]))
        
    test_mags = new_bandpass_dict.magListForSed(test_sed)
        
    colors = [test_mags[x] - test_mags[x+1] for x in range(len(test_mags)-1)]
    test_colors.append(colors)

In [ ]:
nbrs = NearestNeighbors(n_neighbors=300).fit(test_colors)
distance, idx = nbrs.kneighbors(test_colors)

In [ ]:
num_in = []
spec_near_new = []
for spec_on in range(len(pca_obj.spec_list_orig)):
    spec_near_new.append(idx[spec_on][np.where(distance[spec_on] < radius)[0][1:]])
    num_in.append(len(spec_near[-1]))

In [ ]:
# Just redefine to make plot nice
new_bandpass_dict = BandpassDict([
                                  blue_bandpass_1, blue_bandpass_3, 
                                  #blue_bandpass_3, blue_bandpass_4,
                                  bandpass_dict['u'], bandpass_dict['g'], 
                                  bandpass_dict['r'], bandpass_dict['i'], bandpass_dict['z'], 
                                  bandpass_dict['y'], #red_bandpass_1, red_bandpass_2, 
                                  red_bandpass_2, red_bandpass_4
                                 ],
                                ['Blue 1', #'blue_2', 
                                 'Blue 2', #'blue_4', 
                                 'u', 'g', 'r', 'i', 'z', 'y', 
                                 'Red 1',#'red_2', 
                                 'Red 2', #'red_4'
                                ])

In [ ]:
test_on = 8 # Pick test spectrum
fig = plt.figure(figsize=(14,16))
fig.add_subplot(2,1,1)
for near_flux in spec_near[test_on][::-1]:
    plt.plot(wave_nm_test, test_fluxes[near_flux]-test_fluxes[test_on])
off_set = 0.0
for bp, bp_colors in zip(['u', 'g', 'r', 'i', 'z', 'y'], ['purple', 'blue', 'green', 'yellow', 'orange', 'red']):
    wavelen_bp = bandpass_dict[bp].wavelen[np.where(bandpass_dict[bp].sb > 0.)]
    plt.plot(wavelen_bp, np.ones(len(wavelen_bp))*.0004+off_set, lw=8, c=bp_colors, label=str('LSST'+bp))
    off_set += 0.00006
mpl.rcParams['xtick.major.size'] = 15
mpl.rcParams['xtick.major.width'] = 3
mpl.rcParams['xtick.minor.size'] = 15
mpl.rcParams['xtick.minor.width'] = 3

plt.legend(fontsize=22)

plt.ylabel('Flux Difference', size=22)
plt.title('LSST Filters Only, Nearest Neighbors within %.2f mag radius' % radius, size=22)

plt.ylim(-0.0017, 0.001)
plt.xlim(75., 2405.)

fig.add_subplot(2,1,2)
for near_flux in spec_near_new[test_on]:
    plt.plot(wave_nm_test, test_fluxes[near_flux]-test_fluxes[test_on])

mpl.rcParams['xtick.major.size'] = 15
mpl.rcParams['xtick.major.width'] = 3
mpl.rcParams['xtick.minor.size'] = 15
mpl.rcParams['xtick.minor.width'] = 3

off_set = 0.0
for bp, bp_colors in zip(['Blue 1', 'Blue 2', 'u', 'g', 'r', 'i', 'z', 'y', 'Red 1', 'Red 2'], 
                         ['black', 'gray', 'purple', 'blue', 'green', 'yellow', 'orange', 'red', 'indianred', 'brown']):
    wavelen_bp = new_bandpass_dict[bp].wavelen[np.where(new_bandpass_dict[bp].sb > 0.)]
    if len(bp) < 2:
        bp_label = str('LSST'+bp)
    else:
        bp_label = str('Top Hat '+bp)
    plt.plot(wavelen_bp, np.ones(len(wavelen_bp))*-.00072-off_set, lw=8, c=bp_colors, label=bp_label)
    off_set += 0.00006

plt.xlabel('Wavelength (nm)', size=22)
plt.ylabel('Flux Difference', size=22)

plt.title('LSST + 4 Top Hat Filters, Nearest Neighbors within %.2f mag radius' % radius, size=22)

plt.ylim(-0.0017, 0.001)
plt.xlim(75., 2405.)
plt.legend(fontsize=22, ncol=2)
plt.tight_layout()

#plt.savefig('paper_plots_revised/fig_12.png')

In [ ]: