In [2]:
from pylearn2.utils.serial import load as load_model
from pylearn2.gui.get_weights_report import get_weights_report
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import os.path
import io
from IPython.display import display, Image


Using gpu device 0: Tesla K20c

In [3]:
model = load_model(os.path.expandvars('${DATA_DIR}/plankton/models/3_conv_2_fc_64c_h1_resume_recent.pkl'))
#### Model summary Run done with model with three convolutional layers, two fully connected layers and a final softmax layer, with 64 channels per convolutional layer in first two layers and 48 in final. Fully connected layers have 512 units each. Dropout applied in first (larger) fully connected layer (dropout probability 0.5) and random augmentation of dataset with uniform random rotations, shunting and scaling.

In [4]:
print('## Model structure summary\n')
print(model)
params = model.get_params() 
n_params = {p.name : p.get_value().size for p in params}
total_params = sum(n_params.values())
print('\n## Number of parameters\n')
print('  ' + '\n  '.join(['{0} : {1} ({2:.1f}%)'.format(k, v, 100.*v/total_params) 
                          for k, v in sorted(n_params.items(), key=lambda x: x[0])]))
print('\nTotal : {0}'.format(total_params))


## Model structure summary

h1
	Input space: Conv2DSpace(shape=(64, 64), num_channels=1, axes=('b', 0, 1, 'c'), dtype=float32)
	Total input dimension: 4096
h2
	Input space: Conv2DSpace(shape=(34, 34), num_channels=64, axes=('b', 'c', 0, 1), dtype=float32)
	Total input dimension: 73984
h3
	Input space: Conv2DSpace(shape=(18, 18), num_channels=64, axes=('b', 'c', 0, 1), dtype=float32)
	Total input dimension: 20736
h4
	Input space: Conv2DSpace(shape=(10, 10), num_channels=48, axes=('b', 'c', 0, 1), dtype=float32)
	Total input dimension: 4800
h5
	Input space: VectorSpace(dim=512, dtype=float32)
	Total input dimension: 512
y
	Input space: VectorSpace(dim=512, dtype=float32)
	Total input dimension: 512

## Number of parameters

  h1_W : 1600 (0.0%)
  h1_b : 295936 (9.1%)
  h2_W : 36864 (1.1%)
  h2_b : 82944 (2.6%)
  h3_W : 27648 (0.9%)
  h3_b : 19200 (0.6%)
  h4_W : 2457600 (75.7%)
  h4_b : 512 (0.0%)
  h5_W : 262144 (8.1%)
  h5_b : 512 (0.0%)
  softmax_W : 61952 (1.9%)
  softmax_b : 121 (0.0%)

Total : 3247033

Train and valid set NLL trace


In [5]:
tr = np.array(model.monitor.channels['valid_y_y_1_nll'].time_record) / 3600.
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(111)
ax1.plot(model.monitor.channels['valid_y_y_1_nll'].val_record)
ax1.plot(model.monitor.channels['train_y_y_1_nll'].val_record)
ax1.set_xlabel('Epochs')
ax1.legend(['Valid', 'Train'])
ax1.set_ylabel('NLL')
ax1.set_ylim(0., 5.)
ax1.grid(True)
ax2 = ax1.twiny()
ax2.set_xticks(np.arange(0,tr.shape[0],20))
ax2.set_xticklabels(['{0:.2f}'.format(t) for t in tr[::20]])
ax2.set_xlabel('Hours')
print("Minimum validation set NLL {0}".format(min(model.monitor.channels['valid_y_y_1_nll'].val_record)))


Minimum validation set NLL 0.894638836384

Visualising first layer weights

Quite nice features appear to have been learned with some kernels appearing to have been learned at various rotations. Some quite small scale features appear to have been learned too.


In [7]:
pv = get_weights_report(model=model)
img = pv.get_img()
img = img.resize((8*img.size[0], 8*img.size[1]))
img_data = io.BytesIO()
img.save(img_data, format='png')
display(Image(data=img_data.getvalue(), format='png'))


smallest enc weight magnitude: 0.000186428340385
mean enc weight magnitude: 0.118892915547
max enc weight magnitude: 0.800049602985

Learning rate

Initially linear decay learning rate schedule used with monitor based adjuster. Turns out these don't play well together as the linear decay schedule overwrites any adjusments by monitor based extension at the next epoch. After resume initial learning rate manually reduced and learning rate schedule set exclusively with monitor based adjuster.


In [6]:
plt.plot(model.monitor.channels['learning_rate'].val_record)


Out[6]:
[<matplotlib.lines.Line2D at 0x7f9511f4ae90>]

Update norm monitoring

Ratio of update norms to parameter norms across epochs for different layers plotted to give idea of how learning rate schedule performing.


In [33]:
h1_W_up_norms = np.array([float(v) for v in model.monitor.channels['mean_update_h1_W_kernel_norm_mean'].val_record])
h1_W_norms = np.array([float(v) for v in model.monitor.channels['valid_h1_kernel_norms_mean'].val_record])
plt.plot(h1_W_norms / h1_W_up_norms)
plt.show()
plt.plot(model.monitor.channels['valid_h1_kernel_norms_mean'].val_record)
plt.plot(model.monitor.channels['valid_h1_kernel_norms_max'].val_record)


Out[33]:
[<matplotlib.lines.Line2D at 0x7faa79c34110>]

In [34]:
h2_W_up_norms = np.array([float(v) for v in model.monitor.channels['mean_update_h2_W_kernel_norm_mean'].val_record])
h2_W_norms = np.array([float(v) for v in model.monitor.channels['valid_h2_kernel_norms_mean'].val_record])
plt.plot(h2_W_norms / h2_W_up_norms)
plt.show()
plt.plot(model.monitor.channels['valid_h2_kernel_norms_mean'].val_record)
plt.plot(model.monitor.channels['valid_h2_kernel_norms_max'].val_record)


Out[34]:
[<matplotlib.lines.Line2D at 0x7faa7985ead0>]

In [35]:
h3_W_up_norms = np.array([float(v) for v in model.monitor.channels['mean_update_h3_W_kernel_norm_mean'].val_record])
h3_W_norms = np.array([float(v) for v in model.monitor.channels['valid_h3_kernel_norms_mean'].val_record])
plt.plot(h3_W_norms / h3_W_up_norms)
plt.show()
plt.plot(model.monitor.channels['valid_h3_kernel_norms_mean'].val_record)
plt.plot(model.monitor.channels['valid_h3_kernel_norms_max'].val_record)


Out[35]:
[<matplotlib.lines.Line2D at 0x7faa798b6390>]

In [37]:
h4_W_up_norms = np.array([float(v) for v in model.monitor.channels['mean_update_h4_W_col_norm_mean'].val_record])
h4_W_norms = np.array([float(v) for v in model.monitor.channels['valid_h4_col_norms_mean'].val_record])
plt.plot(h4_W_norms / h4_W_up_norms)
plt.show()
plt.plot(model.monitor.channels['valid_h4_col_norms_mean'].val_record)
plt.plot(model.monitor.channels['valid_h4_col_norms_max'].val_record)


Out[37]:
[<matplotlib.lines.Line2D at 0x7faa79084110>]

In [38]:
h5_W_up_norms = np.array([float(v) for v in model.monitor.channels['mean_update_h5_W_col_norm_mean'].val_record])
h5_W_norms = np.array([float(v) for v in model.monitor.channels['valid_h5_col_norms_mean'].val_record])
plt.plot(h5_W_norms / h5_W_up_norms)
plt.show()
plt.plot(model.monitor.channels['valid_h5_col_norms_mean'].val_record)
plt.plot(model.monitor.channels['valid_h5_col_norms_max'].val_record)


Out[38]:
[<matplotlib.lines.Line2D at 0x7faa78e811d0>]

In [ ]: