Investigating using Convolutional Networks on Weak Lensing data

Adpated from Assignment 4 of the Udacity course


In [2]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import matplotlib.pyplot as plt
from astropy.io import fits
plt.rcParams['image.cmap'] = 'viridis'
plt.rcParams['image.interpolation'] = 'none'
%matplotlib inline
from IPython import display

In [11]:
#ugh why is everything broken?? Just copy-pasting Read_WL.ipynb because I lost the Read_WL.py (???) and the pickling
# failed because reasons...

def rebin(a, shape):
    sh = shape[0],a.shape[0]//shape[0],shape[1],a.shape[1]//shape[1]
    return a.reshape(sh).mean(-1).mean(1)

degrade=8
nct = 9
whereami = '/Users/goldston'
#whereami = '/Users/jegpeek'
path = 'Documents/Weak_Lensing/kmaps_smoothed/'

def read_WL(path):
    # this is a version to look at sigma8
    labels=['750', '850']
    imgs = np.zeros([2048/degrade, 2048/degrade, nct, len(labels)])
    for j, label in enumerate(labels):
        for i in range(nct):
            filename = whereami + '/' + path + 'smoothWL-conv_m-512b240_Om0.260_Ol0.740_w-1.000_ns0.960_si0.'+label+'_4096xy_000'+ np.str(i+1) +'r_0029p_0100z_og.gre.fit'
            f = fits.open(filename)
            imgs[:,:,i,j]=rebin(f[0].data, [2048/degrade, 2048/degrade])
    return imgs, labels

data, labels = read_WL(path)

def slice_data(data, labels, exp_cut, exp_nshift):
    labels=['750', '850']
    # how many panels across
    npanelx = 2**exp_cut
    # and how big are they?
    panelw = 2048/(degrade*npanelx)
    # how many shifted panels?
    nshift = 2**exp_nshift -1
    # and what are the shifts?
    shiftw =  panelw/2**exp_nshift
    # with 4 rotations, and 2 shifts, we have
    imgs = np.zeros([panelw, panelw, nct,(npanelx**2 +(npanelx-1)**2*nshift**2)*8, len(labels)])
    # let's figure out where the centers are, and save that data
    x_centers = np.zeros([nct,(npanelx**2 +(npanelx-1)**2*nshift**2)*8, len(labels)])
    y_centers = np.zeros([nct,(npanelx**2 +(npanelx-1)**2*nshift**2)*8, len(labels)])
    for j, label in enumerate(labels):
        for i in range(nct):
            q=0
            for k in range(npanelx):
                for l in range(npanelx):
                    for r in range(4):
                        imgs[:,:,i,q,j] = np.rot90(data[panelw*k:panelw*(k+1),panelw*l:panelw*(l+1),i, j], r)
                        x_centers[i,q,j] = (panelw*k+panelw*(k+1))/2.
                        y_centers[i,q,j] = (panelw*l+panelw*(l+1))/2.
                        q+=1
                        imgs[:,:,i,q,j] = np.fliplr(np.rot90(data[panelw*k:panelw*(k+1),panelw*l:panelw*(l+1),i, j], r))
                        x_centers[i,q,j] = (panelw*k+panelw*(k+1))/2.
                        y_centers[i,q,j] = (panelw*l+panelw*(l+1))/2.
                        q+=1
            for k in range(npanelx-1):
                for l in range(npanelx-1):
                    for m in range(nshift):
                        for n in range(nshift):
                            for r in range(4):
                                imgs[:,:,i,q,j] = np.rot90(data[panelw*k+m*shiftw:panelw*(k+1)+m*shiftw,panelw*l+n*shiftw:panelw*(l+1)+n*shiftw,i, j], r)
                                x_centers[i,q,j] = (panelw*k+m*shiftw+panelw*(k+1)+m*shiftw)/2.
                                y_centers[i,q,j] = (panelw*l+n*shiftw+panelw*(l+1)+n*shiftw)/2.
                                q+=1
                                imgs[:,:,i,q,j] = np.fliplr(np.rot90(data[panelw*k+m*shiftw:panelw*(k+1)+m*shiftw,panelw*l+n*shiftw:panelw*(l+1)+n*shiftw,i, j], r))
                                x_centers[i,q,j] = (panelw*k+m*shiftw+panelw*(k+1)+m*shiftw)/2.
                                y_centers[i,q,j] = (panelw*l+n*shiftw+panelw*(l+1)+n*shiftw)/2.
                                q+=1
    return imgs, x_centers, y_centers

imgs2, x_centers, y_centers = slice_data(data, labels, 3, 3)
img2sh = imgs2.shape
train_dataset = np.transpose(imgs2[:, :, 0:7, :, :].reshape(img2sh[0], img2sh[1], 7.0*img2sh[3]*2.0), (2, 0, 1))
train_xc = x_centers[0:7, :, :].reshape(7.0*img2sh[3]*2.0)
train_yc = y_centers[0:7, :, :].reshape(7.0*img2sh[3]*2.0)
ones = np.ones([7,img2sh[3], 2] )
train_labels = ((np.asarray([0,1])).reshape(1, 1, 2)*ones).reshape(7.0*img2sh[3]*2.0)

valid_dataset = np.transpose(imgs2[:, :, 7, :, :].reshape(img2sh[0], img2sh[1], 1.0*img2sh[3]*2.0), (2, 0, 1))
valid_xc = x_centers[7, :, :].reshape(1.0*img2sh[3]*2.0)
valid_yc = y_centers[7, :, :].reshape(1.0*img2sh[3]*2.0)

ones = np.ones([1,img2sh[3], 2] )
valid_labels = ((np.asarray([0,1])).reshape(1, 1, 2)*ones).reshape(1.0*img2sh[3]*2.0)

test_dataset = np.transpose(imgs2[:, :, 8, :, :].reshape(img2sh[0], img2sh[1], 1.0*img2sh[3]*2.0), (2, 0, 1))
test_xc = x_centers[8, :, :].reshape(1.0*img2sh[3]*2.0)
test_yc = y_centers[8, :, :].reshape(1.0*img2sh[3]*2.0)

ones = np.ones([1,img2sh[3], 2] )
test_labels = ((np.asarray([0,1])).reshape(1, 1, 2)*ones).reshape(1.0*img2sh[3]*2.0)


/Users/goldston/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:73: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/goldston/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:74: DeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-11-35dba6c22fe1> in <module>()
     72 img2sh = imgs2.shape
     73 train_dataset = np.transpose(imgs2[:, :, 0:7, :, :].reshape(img2sh[0], img2sh[1], 7.0*img2sh[3]*2.0), (2, 0, 1))
---> 74 train_xc = np.transpose(x_centers[0:7, :, :].reshape(7.0*img2sh[3]*2.0), (2, 0, 1))
     75 train_yc = np.transpose(y_centers[0:7, :, :].reshape(7.0*img2sh[3]*2.0), (2, 0, 1))
     76 ones = np.ones([7,img2sh[3], 2] )

/Users/goldston/anaconda/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in transpose(a, axes)
    549     except AttributeError:
    550         return _wrapit(a, 'transpose', axes)
--> 551     return transpose(axes)
    552 
    553 

ValueError: axes don't match array

In [12]:
train_xc.shape


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-0363d1a29df5> in <module>()
----> 1 train_xc.shape

NameError: name 'train_xc' is not defined

In [21]:
plt.plot(np.reshape(x_centers, 9*19720*2), np.reshape(y_centers, 9*19720*2), '.')


Out[21]:
[<matplotlib.lines.Line2D at 0x11bdb3690>]

In [10]:
train_labels.shape


Out[10]:
(276080, 2)

In [8]:
#pickle_file = 'notMNIST.pickle'
#pickle_file = '/Users/jegpeek/Documents/WL88.pickle'
pickle_file = '/Users/jegpeek/Dropbox/WL_other.pickle'

usePickle = True

if usePickle:
    with open(pickle_file, 'rb') as f:
      save = pickle.load(f)
      train_dataset = save['train_dataset']
      train_labels = save['train_labels']
      valid_dataset = save['valid_dataset']
      valid_labels = save['valid_labels']
      test_dataset = save['test_dataset']
      test_labels = save['test_labels']
      del save  # hint to help gc free up memory
      print('Training set', train_dataset.shape, train_labels.shape)
      print('Validation set', valid_dataset.shape, valid_labels.shape)
      print('Test set', test_dataset.shape, test_labels.shape)
else:
    %run Read_WL.py


---------------------------------------------------------------------------
EOFError                                  Traceback (most recent call last)
<ipython-input-8-5b5336631171> in <module>()
      7 if usePickle:
      8     with open(pickle_file, 'rb') as f:
----> 9       save = pickle.load(f)
     10       train_dataset = save['train_dataset']
     11       train_labels = save['train_labels']

EOFError: 

Reformat into a TensorFlow-friendly shape:

  • convolutions need the image data formatted as a cube (width by height by #channels)
  • labels as float 1-hot encodings.

In [9]:
image_size = 32
num_labels = 2
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)


('Training set', (276080, 32, 32, 1), (276080, 2))
('Validation set', (39440, 32, 32, 1), (39440, 2))
('Test set', (39440, 32, 32, 1), (39440, 2))

In [7]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.


In [8]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  # Global Step
  global_step = tf.Variable(0)
  learn_decay = 0.85
  learning_rate = tf.train.exponential_decay(0.005, global_step, 10000, learn_decay, staircase=True )

  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  spl = tf.split(3, 16, layer1_weights)
  filter_summary = tf.image_summary((spl[0]).name, spl[0], max_images=1)
  # Model.
  def model(data):
    conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer1_biases)
    conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden = tf.nn.relu(conv + layer2_biases)
    shape = hidden.get_shape().as_list()
    reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

In [10]:
num_steps = 200001
print_step = 200
summary_step = 2000
losses = np.zeros((num_steps-1)/print_step+1)
acc_valid = np.zeros((num_steps-1)/print_step+1)
acc_test = np.zeros((num_steps-1)/print_step+1)
q = 0
p = 0

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  #summary_writer = tf.train.SummaryWriter(whereami+'/Documents/logs', session.graph_def)
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    #if (step % summary_step == 0):
    #  summary_writer.add_summary(filter_summary, p)
    #  p += 1
    if (step % print_step == 0):
      losses[q] = l
      acc_valid[q] = accuracy(valid_prediction.eval(), valid_labels)/100.0
      acc_test[q] = accuracy(test_prediction.eval(), test_labels)/100.0
      q += 1
      plt.plot(np.arange(0,(num_steps-1)/print_step+1), acc_valid, '.', color='b')
      plt.plot((-1)*np.arange(0,(num_steps-1)/print_step+1),acc_test, '.', color='g')
      plt.ylim([0.45, 0.65])
      plt.xlim([-1000, 1000])
      display.clear_output(wait=True)
      display.display(plt.gcf())
      #print('Minibatch loss at step %d: %f' % (step, l))
      #print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      #print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))


Test accuracy: 49.8%

In [11]:
permutation = np.random.permutation(train_labels.shape[0])
train_dataset = train_dataset[permutation,:,:]
train_labels = train_labels[permutation]

In [12]:
num_steps = 20001
print_step = 200
losses = np.zeros((num_steps-1)/print_step+1)
acc_valid = np.zeros((num_steps-1)/print_step+1)
acc_test = np.zeros((num_steps-1)/print_step+1)
q = 0
p=0

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % print_step == 0):
      losses[q] = l
      acc_valid[q] = accuracy(valid_prediction.eval(), valid_labels)/100.0
      acc_test[q] = accuracy(test_prediction.eval(), test_labels)/100.0
      q += 1
      plt.plot(np.arange(0,(num_steps-1)/print_step+1), acc_valid, '.', color='b')
      plt.plot((-1)*np.arange(0,(num_steps-1)/print_step+1),acc_test, '.', color='g')
      plt.ylim([0.45, 0.65])
      plt.xlim([-1000, 1000])
      display.clear_output(wait=True)
      display.display(plt.gcf())
      #print('Minibatch loss at step %d: %f' % (step, l))
      #print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      #print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-12-d3aee524864e> in <module>()
     27       plt.xlim([-1000, 1000])
     28       display.clear_output(wait=True)
---> 29       display.display(plt.gcf())
     30       #print('Minibatch loss at step %d: %f' % (step, l))
     31       #print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))

/Users/goldston/anaconda/lib/python2.7/site-packages/IPython/core/display.pyc in display(*objs, **kwargs)
    156             publish_display_data(data=obj, metadata=metadata)
    157         else:
--> 158             format_dict, md_dict = format(obj, include=include, exclude=exclude)
    159             if not format_dict:
    160                 # nothing to display (e.g. _ipython_display_ took over)

/Users/goldston/anaconda/lib/python2.7/site-packages/IPython/core/formatters.pyc in format(self, obj, include, exclude)
    175             md = None
    176             try:
--> 177                 data = formatter(obj)
    178             except:
    179                 # FIXME: log the exception

<decorator-gen-9> in __call__(self, obj)

/Users/goldston/anaconda/lib/python2.7/site-packages/IPython/core/formatters.pyc in catch_format_error(method, self, *args, **kwargs)
    220     """show traceback on failed format call"""
    221     try:
--> 222         r = method(self, *args, **kwargs)
    223     except NotImplementedError:
    224         # don't warn on NotImplementedErrors

/Users/goldston/anaconda/lib/python2.7/site-packages/IPython/core/formatters.pyc in __call__(self, obj)
    337                 pass
    338             else:
--> 339                 return printer(obj)
    340             # Finally look for special method names
    341             method = _safe_get_formatter_method(obj, self.print_method)

/Users/goldston/anaconda/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in <lambda>(fig)
    226 
    227     if 'png' in formats:
--> 228         png_formatter.for_type(Figure, lambda fig: print_figure(fig, 'png', **kwargs))
    229     if 'retina' in formats or 'png2x' in formats:
    230         png_formatter.for_type(Figure, lambda fig: retina_figure(fig, **kwargs))

/Users/goldston/anaconda/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in print_figure(fig, fmt, bbox_inches, **kwargs)
    117 
    118     bytes_io = BytesIO()
--> 119     fig.canvas.print_figure(bytes_io, **kw)
    120     data = bytes_io.getvalue()
    121     if fmt == 'svg':

/Users/goldston/anaconda/lib/python2.7/site-packages/matplotlib/backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
   2188                 bbox_filtered = []
   2189                 for a in bbox_artists:
-> 2190                     bbox = a.get_window_extent(renderer)
   2191                     if a.get_clip_on():
   2192                         clip_box = a.get_clip_box()

/Users/goldston/anaconda/lib/python2.7/site-packages/matplotlib/lines.pyc in get_window_extent(self, renderer)
    571         if self._marker:
    572             ms = (self._markersize / 72.0 * self.figure.dpi) * 0.5
--> 573             bbox = bbox.padded(ms)
    574         return bbox
    575 

/Users/goldston/anaconda/lib/python2.7/site-packages/matplotlib/transforms.pyc in padded(self, p)
    689         """
    690         points = self.get_points()
--> 691         return Bbox(points + [[-p, -p], [p, p]])
    692 
    693     def translated(self, tx, ty):

/Users/goldston/anaconda/lib/python2.7/site-packages/matplotlib/transforms.pyc in __init__(self, points, **kwargs)
    788         BboxBase.__init__(self, **kwargs)
    789         points = np.asarray(points, np.float_)
--> 790         if points.shape != (2, 2):
    791             raise ValueError('Bbox points must be of the form '
    792                              '"[[x0, y0], [x1, y1]]".')

KeyboardInterrupt: 

In [ ]:


In [108]:
plt.plot(np.arange(0,(num_steps-1)/print_step+1), acc_valid, '.', color='b')
plt.plot((-1)*np.arange(0,(num_steps-1)/print_step+1),acc_test, '.', color='g')
plt.ylim([0.45, 0.75])
plt.xlim([-1000, 1000])


Out[108]:
(-1000, 1000)

In [120]:
plt.plot(acc_test, acc_valid, '.', color='b')
#plt.plot((-1)*np.arange(0,(num_steps-1)/print_step+1),losses, '.', color='g')
plt.ylim([0.4, 0.7])
plt.xlim([0.0, 1.0])


Out[120]:
(0.0, 1.0)

Open questions:

  • Why is there so much scatter in the loss function over time?
  • Is there structure in the loss function over time?
  • If I plot loss vs. accuracy, what do I get?
  • Do I really see a difference when I scramble vs. leave in order, and if so, is it because of the way SGD interacts with the two cosmologies?
  • will deeper / better networks get us over 65%?
  • Why, oh why, are my test and valid data sets so damn well correlated??

In [124]:
graph.get_tensor_by_name.im_func


Out[124]:
<function tensorflow.python.framework.ops.get_tensor_by_name>

Problem 1

The convolutional model above uses convolutions with stride 2 to reduce the dimensionality. Replace the strides by a max pooling operation (nn.max_pool()) of stride 2 and kernel size 2.



Problem 2

Try to get the best performance you can using a convolutional net. Look for example at the classic LeNet5 architecture, adding Dropout, and/or adding learning rate decay.