I did this a bit flippantly before, but I want to fomalize the process by which we estimate the uncertainty on emulator predictions.
In [2]:
from pearce.emulator import SpicyBuffalo, LemonPepperWet
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [3]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [4]:
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
In [5]:
em_method = 'gp'
split_method = 'random'
In [6]:
a = 1.0
z = 1.0/a - 1.0
In [7]:
fixed_params = {'z':z}#, 'cosmo': 0}#, 'r':24.06822623}
In [8]:
np.random.seed(0)
emu = LemonPepperWet(training_file, method = em_method, fixed_params=fixed_params,
custom_mean_function = None, downsample_factor = 0.05)#, hyperparams = {'metric':metric})
In [9]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)
In [10]:
train_err = []
for ridx in xrange(emu.n_bins):
fixed_params = {'r': emu.scale_bin_centers[ridx]}
fixed_params.update(emu.fixed_params)
#test_x, test_y, test_ycov, info = emu.get_data(test_file, fixed_params)
train_x, train_y, train_ycov, info = emu.get_data(training_file, fixed_params, remove_nans = False)
#test_ycov = test_ycov[:, 0,0]
train_ycov = train_ycov[:, 0,0]
#print np.mean(test_y*np.sqrt(test_ycov))
train_err.append(np.sqrt(np.nanmean(train_ycov)))
In [11]:
plt.plot(emu.scale_bin_centers, train_err)
plt.xscale('log')
In [12]:
acc = []
log_acc = []
for ridx in xrange(emu.n_bins):
py, dy = pred_y[ridx], data_y[ridx]
acc.append(np.mean(np.abs(10**py-10**dy)/(10**dy)) )
log_acc.append(np.mean(np.abs(py-dy)/dy) )
In [13]:
plt.plot(emu.scale_bin_centers, train_err)
plt.plot(emu.scale_bin_centers, acc)
plt.xscale('log')
In [14]:
from pearce.mocks.kittens import TrainingBox
In [15]:
cat = TrainingBox(0, system = 'sherlock')
In [16]:
cat.load(1.0, HOD = 'redMagic')
In [17]:
train_x[:, -4:]
Out[17]:
In [18]:
emu.get_param_names()
Out[18]:
In [19]:
HOD = {'logM0':13.91, 'sigma_logM': 0.12, 'logM1':13.188, 'alpha':0.87}
HOD['logMmin'] = 13.2
In [21]:
xis = []
rbins = np.logspace(-1.1, 1.6, 19)
for i in xrange(10):
cat.populate(HOD)
xis.append(cat.calc_xi(rbins))
In [22]:
xis = np.array(xis)
In [33]:
xi_errs = np.sqrt(np.diag(np.cov(xis, rowvar = False)))/np.nanmean(xis,axis = 0)
In [34]:
xi_errs
Out[34]:
In [35]:
plt.plot(emu.scale_bin_centers, train_err)
plt.plot(emu.scale_bin_centers, acc)
plt.plot(emu.scale_bin_centers, xi_errs)
plt.xscale('log')
In [79]:
resmat_flat = resmat.reshape((-1, 18)).T
datamat_flat = datamat.reshape((-1, 18)).T
#resmat_hodrealav = np.mean(resmat_realav, axis = 1)
In [133]:
r_idx = 10
t_bin = t[r_idx]
acc_bin = np.abs(resmat_flat[r_idx])/datamat_flat[r_idx]
In [135]:
percentiles = np.percentile(acc_bin, range(101))
norm_acc_bin = np.digitize(acc_bin, percentiles)
#norm_acc_bin = 100*((acc_bin - acc_bin.min())/acc_bin.max()).astype(int)
In [136]:
palette = sns.diverging_palette(220, 20, n=len(percentiles)-1, as_cmap=True)
#sns.set_palette(palette)
In [137]:
pnames = emu.get_param_names()
In [138]:
for axes1 in xrange(7,11):
for axes2 in xrange(axes1+1, 11):
cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_acc_bin,cmap = palette, alpha = 0.2)
plt.colorbar(cbar)
plt.xlabel(pnames[axes1])
plt.ylabel(pnames[axes2])
#plt.gray()
plt.show()
In [131]:
test_err_bin = test_err[:, r_idx, r_idx]
In [132]:
plt.hist(np.log10(test_err_bin) )
Out[132]:
In [120]:
plt.hist(np.log10(np.sqrt(emu.yerr[r_idx])) )
Out[120]:
In [110]:
percentiles = np.percentile(np.log10(test_err_bin), [0, 10,20,30,40,50,60,70,80,90,100])
norm_err_bin = np.digitize(np.log10(test_err_bin), percentiles)
In [121]:
#relevant stat is uncertainty in training error, not test error
for axes1 in xrange(7,11):
for axes2 in xrange(axes1+1, 11):
cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_err_bin,cmap = palette, alpha = 0.2)
plt.colorbar(cbar)
plt.xlabel(pnames[axes1])
plt.ylabel(pnames[axes2])
#plt.gray()
plt.show()
In [90]:
test_err_diag= np.array([test_err[:, r_idx, r_idx] for r_idx in xrange(emu.n_bins)] )
In [91]:
test_err_diag.mean(axis = 1)
Out[91]:
In [112]:
plt.plot(emu.scale_bin_centers, np.mean(np.abs(resmat_flat)/(10**datamat_flat), axis = 1) )
plt.plot(emu.scale_bin_centers, np.mean(np.abs(test_err_diag), axis = 1) )
plt.xscale('log')
In [93]:
resmat_log_flat = resmat_log.reshape((-1, 18)).T
In [113]:
plt.plot(emu.scale_bin_centers, np.mean(np.abs(resmat_log_flat)/np.abs(datamat_flat), axis = 1), label = 'Pred acc' )
#plt.plot(emu.scale_bin_centers, np.mean(np.array([ye/np.abs(y+1e-9) for ye, y in zip(emu.yerr, emu.y)]), axis = 1), label = 'Training Err' )
plt.yscale('log')
plt.legend(loc ='best')
plt.xscale('log')
In [ ]:
for res, dat in zip(resmat_flat, 10**datamat_flat):
plt.hist(res/dat, bins = 30)#, bins = np.linspace(-1, 1, 30))
plt.yscale('log')
plt.show()
In [ ]:
In [ ]: