In [462]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [463]:
from pearce.emulator import OriginalRecipe, ExtraCrispy
from pearce.mocks import cat_dict
import numpy as np
from os import path

In [464]:
import tensorflow as tf

In [465]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [466]:
training_file = '/home/sean/PearceRedMagicXiCosmo.hdf5'
test_file = '/home/sean/PearceRedMagicXiCosmoTest.hdf5'
training_file = '/home/sean/PearceRedMagicXiChinchilla.hdf5' test_file = '/home/sean/PearceRedMagicXiChinchillaTest.hdf5'

In [467]:
em_method = 'nn'
split_method = 'random'

In [468]:
a = 1.0
z = 1.0/a - 1.0
emu.scale_bin_centers

In [469]:
fixed_params = {'z':z}#, 'r':17.0389993 }
n_leaves, n_overlap = 50, 1 emu = ExtraCrispy(training_file, n_leaves, n_overlap, split_method, method = em_method, fixed_params=fixed_params, custom_mean_function = None, downsample_factor = 1.0)

In [470]:
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
                    hyperparams = {'hidden_layer_sizes': (10),
                                 'activation': 'relu', 'verbose': True, 
                                    'tol': 1e-8, 'learning_rate_init':1e-4,\
                                   'max_iter':10, 'alpha':0, 'early_stopping':False, 'validation_fraction':0.3})


Iteration 1, loss = 0.52466542
Iteration 2, loss = 0.13790451
Iteration 3, loss = 0.09556457
Iteration 4, loss = 0.08696485
Iteration 5, loss = 0.08392834
Iteration 6, loss = 0.08262118
Iteration 7, loss = 0.08184635
Iteration 8, loss = 0.08129401
Iteration 9, loss = 0.08087770
Iteration 10, loss = 0.08054108
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params, hyperparams = {'hidden_layer_sizes': (1000, 500, 300), 'activation': 'relu', 'verbose': True, 'tol': 1e-8, 'learning_rate_init':5e-5,\ 'max_iter':2000, 'alpha':0})

In [471]:
idxs = np.random.choice(emu.x.shape[0], size = int(emu.x.shape[0]*1.0), replace=False)

x_train, y_train,yerr_train = emu.x[idxs, :],emu.y[idxs],emu.yerr[idxs]

y_train = y_train*(emu._y_std + 1e-5) + emu._y_mean
yerr_train = yerr_train*(emu._y_std+1e-5)

In [472]:
idxs


Out[472]:
array([ 47945,  72465,  97609, ...,  66645,  34983, 431257])

In [473]:
emu.get_param_names()[:7]


Out[473]:
['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff']

In [474]:
unique_cosmos = np.unique(x_train[:, :7], axis =0)#*(emu._x_std[:7]+1e-5) + emu._x_mean[:7]

In [475]:
unique_cosmos.shape


Out[475]:
(40, 7)
left_out_cosmo = unique_cosmos[0] is_loc = np.all(x_train[:,:7] == left_out_cosmo, axis = 1) x_test = x_train[is_loc] x_train = x_train[~is_loc] y_test = y_train[is_loc] y_train = y_train[~is_loc] yerr_test = yerr_train[is_loc] yerr_train = yerr_train[~is_loc]

In [476]:
x_test, y_test, ycov_test, _ = emu.get_data(test_file, fixed_params, None, False)
x_test = (x_test - emu._x_mean)/(emu._x_std+1e-5)

#split_ycov = np.dsplit(ycov_test, ycov_test.shape[-1])
        #fullcov = block_diag(*[yc[:,:,0] for yc in split_ycov])
#yerr_test = np.sqrt(np.hstack(np.diag(syc[:,:,0]) for syc in split_ycov))
from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test, yerr_train, _ = train_test_split(x_train, y_train,yerr_train, test_size = 0.3, shuffle = True)
pnames = emu.get_param_names() for i in xrange(x_train.shape[1]): for j in xrange(i+1,x_train.shape[1]): plt.scatter(x_train[:,i], x_train[:,j]) plt.scatter(x_test[:,i], x_test[:,j]) plt.title('%s vs %s'%(pnames[i], pnames[j])) plt.show();
plt.plot(x_np[:emu.n_bins, -1:], y_np[:emu.n_bins])

In [477]:
def n_layer_fc(x, hidden_sizes, training=False, l = 1e-8):
    initializer = tf.variance_scaling_initializer(scale=2.0)
    regularizer = tf.contrib.layers.l2_regularizer(l)
    fc_output = tf.layers.dense(x, hidden_sizes[0], activation=tf.nn.relu,
                                 kernel_initializer = initializer, kernel_regularizer = regularizer)
                                 #kernel_regularizer = tf.nn.l2_loss)
    #fc2_output = tf.layers.dense(fc1_output, hidden_sizes[1], activation=tf.nn.relu,
    #                             kernel_initializer = initializer, kernel_regularizer = regularizer)
    for size in hidden_sizes[1:]:
        fc_output = tf.layers.dense(fc_output, size, activation=tf.nn.relu, kernel_initializer=initializer,
                                 kernel_regularizer = regularizer)
    pred = tf.layers.dense(fc_output, 1, kernel_initializer=initializer, 
                              kernel_regularizer = regularizer)[:,0]#,
    return pred

In [478]:
def n_layer_fc_do_bn(x, hidden_sizes, training=False, l = 1e-6):
    initializer = tf.variance_scaling_initializer(scale=2.0)
    regularizer = tf.contrib.layers.l1_regularizer(l)
    nl_out = x

    for size in hidden_sizes:
        fc_output = tf.layers.dense(nl_out, size,
                                 kernel_initializer = initializer, kernel_regularizer = regularizer)
        bd_out = tf.layers.dropout(fc_output, 0.5, training = training)
        bn_out = tf.layers.batch_normalization(bd_out, axis = -1, training=training)
        nl_out = tf.nn.relu(bn_out)#tf.nn.leaky_relu(bn_out, alpha=0.01)
    
    pred = tf.layers.dense(nl_out, 1, kernel_initializer=initializer, 
                              kernel_regularizer = regularizer)#[:,0]#,
    return pred

In [479]:
def optimizer_init_fn(learning_rate = 1e-6):
    return tf.train.AdamOptimizer(learning_rate)#, beta1=0.9, beta2=0.999, epsilon=1e-6)

In [480]:
from sklearn.metrics import r2_score, mean_squared_error

In [481]:
def check_accuracy(sess, val_data,batch_size, x, weights, preds, is_training=None):
    val_x, val_y = val_data
    perc_acc, scores = [],[]
    for idx in xrange(0, val_x.shape[0], batch_size):
        feed_dict = {x: val_x[idx:idx+batch_size],
                     is_training: 0}
        y_pred = sess.run(preds, feed_dict=feed_dict)
        #print y_pred.shape, val_y[idx:idx+batch_size].shape
        score = r2_score(val_y[idx:idx+batch_size], y_pred)
        scores.append(score)
        
        perc_acc = np.mean(emu._y_std*np.abs(val_y[idx:idx+batch_size]-y_pred)/np.abs(emu._y_std*val_y[idx:idx+batch_size] + emu._y_mean) )

        
    print 'Val score: %.6f, %.2f %% Loss'%(np.mean(np.array(scores)), 100*np.mean(np.array(perc_acc)))

In [482]:
device = '/device:GPU:0'
#device = '/cpu:0'
def train(model_init_fn, optimizer_init_fn,num_params, train_data, val_data, hidden_sizes,\
               num_epochs=1, batch_size = 200, print_every=10):
    tf.reset_default_graph()    
    with tf.device(device):
        # Construct the computational graph we will use to train the model. We
        # use the model_init_fn to construct the model, declare placeholders for
        # the data and labels
        x = tf.placeholder(tf.float32, [None,num_params])
        y = tf.placeholder(tf.float32, [None, emu.n])
        weights = tf.placeholder(tf.float32, [None])
        
        is_training = tf.placeholder(tf.bool, name='is_training')
        
        preds = model_init_fn(x, hidden_sizes, is_training)

        # Compute the loss like we did in Part II
        #loss = tf.reduce_mean(loss)
        
    with tf.device('/cpu:0'):
        loss = tf.losses.mean_squared_error(labels=y,\
                    predictions=preds, weights = weights)#weights?
        #loss = tf.losses.absolute_difference(labels=y, predictions=preds, weights = tf.abs(1.0/y))#weights?

    with tf.device(device):
        optimizer = optimizer_init_fn()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)
        
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        #t = 0
        train_x, train_y, train_yerr = train_data
        rand_idxs = range(train_x.shape[0])
        for epoch in range(num_epochs):
            #print('Starting epoch %d' % epoch)
            np.random.shuffle(rand_idxs)
            losses = []
            for idx in xrange(0, train_x.shape[0], batch_size):
                feed_dict = {x: train_x[rand_idxs[idx:idx+batch_size]],\
                             y: train_y[rand_idxs[idx:idx+batch_size]],\
                             weights: 1/train_yerr[rand_idxs[idx:idx+batch_size]],\
                             is_training:1}
                loss_np, _, preds_np = sess.run([loss, train_op, preds], feed_dict=feed_dict)
                losses.append(loss_np)
            if epoch % print_every == 0:
                loss_avg = np.mean(np.array(losses))
                print('Epoch %d, loss = %e' % (epoch, loss_avg))
                check_accuracy(sess, val_data, batch_size, x, weights, preds, is_training=is_training)
            #t += 1

In [483]:
train(n_layer_fc_do_bn, optimizer_init_fn, x_train.shape[1],\
           (x_train, y_train, yerr_train), (x_test, y_test),\
           [50,50,50,50,50,50,50,50,50,50], num_epochs= int(5e3), batch_size = 1000, \
           print_every = 10)


Epoch 0, loss = 1.171817e+03
Val score: -1.198081, 56.28 % Loss
Epoch 10, loss = 1.079966e+03
Val score: -1.085652, 55.53 % Loss
Epoch 20, loss = 1.003055e+03
Val score: -0.982065, 55.02 % Loss
Epoch 30, loss = 9.433729e+02
Val score: -0.897097, 54.85 % Loss
Epoch 40, loss = 8.916740e+02
Val score: -0.825690, 54.82 % Loss
Epoch 50, loss = 8.571757e+02
Val score: -0.764439, 54.86 % Loss
Epoch 60, loss = 8.308134e+02
Val score: -0.708966, 54.99 % Loss
Epoch 70, loss = 8.088951e+02
Val score: -0.666655, 55.15 % Loss
Epoch 80, loss = 7.982420e+02
Val score: -0.629501, 55.37 % Loss
Epoch 90, loss = 7.905529e+02
Val score: -0.598629, 55.53 % Loss
Epoch 100, loss = 7.775495e+02
Val score: -0.578845, 55.67 % Loss
Epoch 110, loss = 7.703328e+02
Val score: -0.564151, 55.76 % Loss
Epoch 120, loss = 7.653372e+02
Val score: -0.560196, 55.76 % Loss
Epoch 130, loss = 7.548825e+02
Val score: -0.550671, 55.82 % Loss
Epoch 140, loss = 7.490491e+02
Val score: -0.550692, 55.81 % Loss
Epoch 150, loss = 7.412849e+02
Val score: -0.546920, 55.82 % Loss
Epoch 160, loss = 7.341974e+02
Val score: -0.546793, 55.82 % Loss
Epoch 170, loss = 7.225161e+02
Val score: -0.546121, 55.81 % Loss
Epoch 180, loss = 7.149212e+02
Val score: -0.545887, 55.81 % Loss
Epoch 190, loss = 7.082894e+02
Val score: -0.543018, 55.83 % Loss
Epoch 200, loss = 6.986765e+02
Val score: -0.545194, 55.79 % Loss
Epoch 210, loss = 6.896097e+02
Val score: -0.545848, 55.79 % Loss
Epoch 220, loss = 6.771858e+02
Val score: -0.551563, 55.73 % Loss
Epoch 230, loss = 6.693393e+02
Val score: -0.552990, 55.72 % Loss
Epoch 240, loss = 6.604266e+02
Val score: -0.555284, 55.69 % Loss
Epoch 250, loss = 6.497037e+02
Val score: -0.563315, 55.64 % Loss
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-483-06f448d7945f> in <module>()
----> 1 train(n_layer_fc_do_bn, optimizer_init_fn, x_train.shape[1],           (x_train, y_train, yerr_train), (x_test, y_test),           [50,50,50,50,50,50,50,50,50,50], num_epochs= int(5e3), batch_size = 1000,            print_every = 10)

<ipython-input-482-bf038f4aa0e5> in train(model_init_fn, optimizer_init_fn, num_params, train_data, val_data, hidden_sizes, num_epochs, batch_size, print_every)
     39             for idx in xrange(0, train_x.shape[0], batch_size):
     40                 feed_dict = {x: train_x[rand_idxs[idx:idx+batch_size]],                             y: train_y[rand_idxs[idx:idx+batch_size]],                             weights: 1/train_yerr[rand_idxs[idx:idx+batch_size]],                             is_training:1}
---> 41                 loss_np, _, preds_np = sess.run([loss, train_op, preds], feed_dict=feed_dict)
     42                 losses.append(loss_np)
     43             if epoch % print_every == 0:

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    898     try:
    899       result = self._run(None, fetches, feed_dict, options_ptr,
--> 900                          run_metadata_ptr)
    901       if run_metadata:
    902         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1133     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1134       results = self._do_run(handle, final_targets, final_fetches,
-> 1135                              feed_dict_tensor, options, run_metadata)
   1136     else:
   1137       results = []

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1314     if handle is None:
   1315       return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316                            run_metadata)
   1317     else:
   1318       return self._do_call(_prun_fn, handle, feeds, fetches)

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1320   def _do_call(self, fn, *args):
   1321     try:
-> 1322       return fn(*args)
   1323     except errors.OpError as e:
   1324       message = compat.as_text(e.message)

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1305       self._extend_graph()
   1306       return self._call_tf_sessionrun(
-> 1307           options, feed_dict, fetch_list, target_list, run_metadata)
   1308 
   1309     def _prun_fn(handle, feed_dict, fetch_list):

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
   1407       return tf_session.TF_SessionRun_wrapper(
   1408           self._session, options, feed_dict, fetch_list, target_list,
-> 1409           run_metadata)
   1410     else:
   1411       with errors.raise_exception_on_not_ok_status() as status:

KeyboardInterrupt: 

In [ ]:
np.abs(emu.goodness_of_fit(training_file, statistic = 'log_frac')).mean()

In [ ]:
np.abs(emu.goodness_of_fit(training_file, statistic = 'frac')).mean()

In [ ]:
fit_idxs = np.argsort(gof.mean(axis = 1))

In [ ]:
emu.goodness_of_fit(training_file).mean()#, statistic = 'log_frac')).mean()

In [ ]:
model = emu._emulator

In [ ]:
ypred = model.predict(emu.x)

In [ ]:
plt.hist( np.log10( (emu._y_std+1e-5)*np.abs(ypred-emu.y)/np.abs((emu._y_std+1e-5)*emu.y+emu._y_mean) ))

In [ ]:
( (emu._y_std+1e-5)*np.abs(ypred-emu.y)/np.abs((emu._y_std+1e-5)*emu.y+emu._y_mean) ).mean()

In [ ]:
emu._y_mean, emu._y_std

In [ ]:
for idx in fit_idxs[:10]:
    print gof[idx].mean()
    print (ypred[idx*emu.n_bins:(idx+1)*emu.n_bins]-emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins])/emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins]
    plt.plot(emu.scale_bin_centers, ypred[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'Emu')
    plt.plot(emu.scale_bin_centers, emu.y[idx*emu.n_bins:(idx+1)*emu.n_bins], label = 'True')
    plt.legend(loc='best')
    plt.xscale('log')
    plt.show()

In [ ]:
print dict(zip(emu.get_param_names(), emu.x[8*emu.n_bins, :]*emu._x_std+emu._x_mean))

In [ ]:
emu.get_param_names()
#print emu.x.shape #print emu.downsample_x.shape if hasattr(emu, "_emulators"): print emu._emulators[0]._x.shape else: print emu._emulator._x.shape

In [ ]:
emu._ordered_params
x, y, y_pred = emu.goodness_of_fit(training_file, statistic = 'log_frac')
x, y, y_pred
N = 25 for _y, yp in zip(y[:N], y_pred[:N]): #plt.plot(emu.scale_bin_centers , (_y - yp)/yp ,alpha = 0.3, color = 'b') plt.plot(emu.scale_bin_centers, _y, alpha = 0.3, color = 'b') plt.plot(emu.scale_bin_centers, yp, alpha = 0.3, color = 'r') plt.loglog();
%%timeit #truth_file = '/u/ki/swmclau2/des/PearceRedMagicWpCosmoTest.hdf5' gof = emu.goodness_of_fit(training_file, N = 100, statistic = 'log_frac')

In [ ]:
gof = emu.goodness_of_fit(training_file, statistic = 'frac')
print gof.mean()

In [ ]:
for row in gof:
    print row

In [ ]:
gof = emu.goodness_of_fit(training_file, statistic = 'frac')
print gof.mean()

In [ ]:
model = emu._emulator

In [ ]:
model.score(emu.x, emu.y)

In [ ]:
ypred = model.predict(emu.x)

np.mean(np.abs(ypred-emu.y)/emu.y)

In [ ]:
plt.plot(emu.scale_bin_centers, np.abs(gof.mean(axis = 0)) )
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01)
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.05)
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.1)


plt.loglog();

In [ ]:
plt.plot(emu.scale_bin_centers, np.abs(gof.T),alpha = 0.1, color = 'b')
plt.plot(emu.scale_bin_centers, np.ones_like(emu.scale_bin_centers)*0.01, lw = 2, color = 'k')
plt.loglog();

In [ ]:
gof[:,i].shape

In [ ]:
for i in xrange(gof.shape[1]):
    plt.hist(np.log10(gof[:, i]), label = str(i), alpha = 0.2);
    
plt.legend(loc='best')
plt.show()

In [ ]: