In [462]:
%load_ext autoreload
%autoreload 2
In [463]:
from pearce.emulator import OriginalRecipe, ExtraCrispy
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [464]:
import tensorflow as tf
In [465]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [466]:
training_file = '/home/sean/PearceRedMagicXiCosmo.hdf5'
test_file = '/home/sean/PearceRedMagicXiCosmoTest.hdf5'
In [467]:
em_method = 'nn'
split_method = 'random'
In [468]:
a = 1.0
z = 1.0/a - 1.0
In [469]:
fixed_params = {'z':z}#, 'r':17.0389993 }
In [470]:
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
hyperparams = {'hidden_layer_sizes': (10),
'activation': 'relu', 'verbose': True,
'tol': 1e-8, 'learning_rate_init':1e-4,\
'max_iter':10, 'alpha':0, 'early_stopping':False, 'validation_fraction':0.3})
In [471]:
idxs = np.random.choice(emu.x.shape[0], size = int(emu.x.shape[0]*1.0), replace=False)
x_train, y_train,yerr_train = emu.x[idxs, :],emu.y[idxs],emu.yerr[idxs]
y_train = y_train*(emu._y_std + 1e-5) + emu._y_mean
yerr_train = yerr_train*(emu._y_std+1e-5)
In [472]:
idxs
Out[472]:
In [473]:
emu.get_param_names()[:7]
Out[473]:
In [474]:
unique_cosmos = np.unique(x_train[:, :7], axis =0)#*(emu._x_std[:7]+1e-5) + emu._x_mean[:7]
In [475]:
unique_cosmos.shape
Out[475]:
In [476]:
x_test, y_test, ycov_test, _ = emu.get_data(test_file, fixed_params, None, False)
x_test = (x_test - emu._x_mean)/(emu._x_std+1e-5)
#split_ycov = np.dsplit(ycov_test, ycov_test.shape[-1])
#fullcov = block_diag(*[yc[:,:,0] for yc in split_ycov])
#yerr_test = np.sqrt(np.hstack(np.diag(syc[:,:,0]) for syc in split_ycov))
In [477]:
def n_layer_fc(x, hidden_sizes, training=False, l = 1e-8):
initializer = tf.variance_scaling_initializer(scale=2.0)
regularizer = tf.contrib.layers.l2_regularizer(l)
fc_output = tf.layers.dense(x, hidden_sizes[0], activation=tf.nn.relu,
kernel_initializer = initializer, kernel_regularizer = regularizer)
#kernel_regularizer = tf.nn.l2_loss)
#fc2_output = tf.layers.dense(fc1_output, hidden_sizes[1], activation=tf.nn.relu,
# kernel_initializer = initializer, kernel_regularizer = regularizer)
for size in hidden_sizes[1:]:
fc_output = tf.layers.dense(fc_output, size, activation=tf.nn.relu, kernel_initializer=initializer,
kernel_regularizer = regularizer)
pred = tf.layers.dense(fc_output, 1, kernel_initializer=initializer,
kernel_regularizer = regularizer)[:,0]#,
return pred
In [478]:
def n_layer_fc_do_bn(x, hidden_sizes, training=False, l = 1e-6):
initializer = tf.variance_scaling_initializer(scale=2.0)
regularizer = tf.contrib.layers.l1_regularizer(l)
nl_out = x
for size in hidden_sizes:
fc_output = tf.layers.dense(nl_out, size,
kernel_initializer = initializer, kernel_regularizer = regularizer)
bd_out = tf.layers.dropout(fc_output, 0.5, training = training)
bn_out = tf.layers.batch_normalization(bd_out, axis = -1, training=training)
nl_out = tf.nn.relu(bn_out)#tf.nn.leaky_relu(bn_out, alpha=0.01)
pred = tf.layers.dense(nl_out, 1, kernel_initializer=initializer,
kernel_regularizer = regularizer)#[:,0]#,
return pred
In [479]:
def optimizer_init_fn(learning_rate = 1e-6):
return tf.train.AdamOptimizer(learning_rate)#, beta1=0.9, beta2=0.999, epsilon=1e-6)
In [480]:
from sklearn.metrics import r2_score, mean_squared_error
In [481]:
def check_accuracy(sess, val_data,batch_size, x, weights, preds, is_training=None):
val_x, val_y = val_data
perc_acc, scores = [],[]
for idx in xrange(0, val_x.shape[0], batch_size):
feed_dict = {x: val_x[idx:idx+batch_size],
is_training: 0}
y_pred = sess.run(preds, feed_dict=feed_dict)
#print y_pred.shape, val_y[idx:idx+batch_size].shape
score = r2_score(val_y[idx:idx+batch_size], y_pred)
scores.append(score)
perc_acc = np.mean(emu._y_std*np.abs(val_y[idx:idx+batch_size]-y_pred)/np.abs(emu._y_std*val_y[idx:idx+batch_size] + emu._y_mean) )
print 'Val score: %.6f, %.2f %% Loss'%(np.mean(np.array(scores)), 100*np.mean(np.array(perc_acc)))
In [482]:
device = '/device:GPU:0'
#device = '/cpu:0'
def train(model_init_fn, optimizer_init_fn,num_params, train_data, val_data, hidden_sizes,\
num_epochs=1, batch_size = 200, print_every=10):
tf.reset_default_graph()
with tf.device(device):
# Construct the computational graph we will use to train the model. We
# use the model_init_fn to construct the model, declare placeholders for
# the data and labels
x = tf.placeholder(tf.float32, [None,num_params])
y = tf.placeholder(tf.float32, [None, emu.n])
weights = tf.placeholder(tf.float32, [None])
is_training = tf.placeholder(tf.bool, name='is_training')
preds = model_init_fn(x, hidden_sizes, is_training)
# Compute the loss like we did in Part II
#loss = tf.reduce_mean(loss)
with tf.device('/cpu:0'):
loss = tf.losses.mean_squared_error(labels=y,\
predictions=preds, weights = weights)#weights?
#loss = tf.losses.absolute_difference(labels=y, predictions=preds, weights = tf.abs(1.0/y))#weights?
with tf.device(device):
optimizer = optimizer_init_fn()
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#t = 0
train_x, train_y, train_yerr = train_data
rand_idxs = range(train_x.shape[0])
for epoch in range(num_epochs):
#print('Starting epoch %d' % epoch)
np.random.shuffle(rand_idxs)
losses = []
for idx in xrange(0, train_x.shape[0], batch_size):
feed_dict = {x: train_x[rand_idxs[idx:idx+batch_size]],\
y: train_y[rand_idxs[idx:idx+batch_size]],\
weights: 1/train_yerr[rand_idxs[idx:idx+batch_size]],\
is_training:1}
loss_np, _, preds_np = sess.run([loss, train_op, preds], feed_dict=feed_dict)
losses.append(loss_np)
if epoch % print_every == 0:
loss_avg = np.mean(np.array(losses))
print('Epoch %d, loss = %e' % (epoch, loss_avg))
check_accuracy(sess, val_data, batch_size, x, weights, preds, is_training=is_training)
#t += 1
In [483]:
train(n_layer_fc_do_bn, optimizer_init_fn, x_train.shape[1],\
(x_train, y_train, yerr_train), (x_test, y_test),\
[50,50,50,50,50,50,50,50,50,50], num_epochs= int(5e3), batch_size = 1000, \
print_every = 10)
In [ ]:
np.abs(emu.goodness_of_fit(training_file, statistic = 'log_frac')).mean()
In [ ]:
np.abs(emu.goodness_of_fit(training_file, statistic = 'frac')).mean()
In [ ]:
fit_idxs = np.argsort(gof.mean(axis = 1))
In [ ]:
emu.goodness_of_fit(training_file).mean()#, statistic = 'log_frac')).mean()
In [ ]:
model = emu._emulator
In [ ]:
ypred = model.predict(emu.x)
In [ ]:
plt.hist( np.log10( (emu._y_std+1e-5)*np.abs(ypred-emu.y)/np.abs((emu._y_std+1e-5)*emu.y+emu._y_mean) ))
In [ ]:
( (emu._y_std+1e-5)*np.abs(ypred-emu.y)/np.abs((emu._y_std+1e-5)*emu.y+emu._y_mean) ).mean()
In [ ]:
emu._y_mean, emu._y_std
In [ ]:
for idx in fit_idxs[:10]:
print gof[idx].mean()
print (ypred[idx*emu.n_bins:(idx+1)*emu.n_bins]-emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins])/emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins]
plt.plot(emu.scale_bin_centers, ypred[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'Emu')
plt.plot(emu.scale_bin_centers, emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'True')
plt.legend(loc='best')
plt.xscale('log')
plt.show()
In [ ]:
print dict(zip(emu.get_param_names(), emu.x[8*emu.n_bins, :]*emu._x_std+emu._x_mean))
In [ ]:
emu.get_param_names()
In [ ]:
emu._ordered_params
In [ ]:
gof = emu.goodness_of_fit(training_file, statistic = 'frac')
print gof.mean()
In [ ]:
for row in gof:
print row
In [ ]:
gof = emu.goodness_of_fit(training_file, statistic = 'frac')
print gof.mean()
In [ ]:
model = emu._emulator
In [ ]:
model.score(emu.x, emu.y)
In [ ]:
ypred = model.predict(emu.x)
np.mean(np.abs(ypred-emu.y)/emu.y)
In [ ]:
plt.plot(emu.scale_bin_centers, np.abs(gof.mean(axis = 0)) )
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01)
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.05)
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.1)
plt.loglog();
In [ ]:
plt.plot(emu.scale_bin_centers, np.abs(gof.T),alpha = 0.1, color = 'b')
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01, lw = 2, color = 'k')
plt.loglog();
In [ ]:
gof[:,i].shape
In [ ]:
for i in xrange(gof.shape[1]):
plt.hist(np.log10(gof[:, i]), label = str(i), alpha = 0.2);
plt.legend(loc='best')
plt.show()
In [ ]: