In [1]:
from pylearn2.datasets.mnist import MNIST
from pylearn2.train import Train
from pylearn2.models import softmax_regression, mlp
from pylearn2.training_algorithms import bgd, sgd
from pylearn2.termination_criteria import MonitorBased
from pylearn2.train_extensions import best_params
from pylearn2.utils import serial
from pylearn2.costs.mlp.dropout import Dropout
from theano import function
from theano import tensor as T
import numpy as np
import os
from __future__ import division
from time import time

In [2]:
train = MNIST('train', start=0, stop=50000)

In [3]:
train


Out[3]:
<pylearn2.datasets.mnist.MNIST at 0x1043e1210>

In [3]:
print "training space : %s" %str(train.X_space)
print "training space : %s rows  %s columns" %(train.X.shape[0], train.X.shape[1])
print "unique members of lable : %s " %str(np.unique(train.y))


training space : VectorSpace(dim=784, dtype=float64)
training space : 50000 rows  784 columns
unique members of lable : [0 1 2 3 4 5 6 7 8 9] 

In [4]:
h0 = mlp.Softplus(layer_name='h0', dim=800, sparse_init=40)
h1 = mlp.Softplus(layer_name='h1', dim=800, sparse_init=40)
# h2 = mlp.Softplus(layer_name='h2', dim=50, sparse_init=15)
ylayer = mlp.Softmax(layer_name='y', n_classes=10, irange=0)
layers = [h0, h1, ylayer]

In [5]:
model = mlp.MLP(layers, nvis=784)
train = MNIST('train', start=0, stop=50000)
valid = MNIST('train', start=50000, stop=60000)
test = MNIST('test', start=0, stop=10000)

In [6]:
monitoring = dict(valid=valid)
termination = MonitorBased(channel_name="valid_y_misclass", N=100)
extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass",
save_path="train_best.pkl")]

In [ ]:
algorithm = sgd.SGD(0.1, batch_size=100, cost=Dropout(),
                    monitoring_dataset = monitoring, termination_criterion = termination)

In [ ]:
save_path = "train_best.pkl"
t0 = time()
if os.path.exists(save_path):
    model = serial.load(save_path)
else:
    print 'Running training'
    train_job = Train(train, model, algorithm, extensions=extensions, save_path="train.pkl", save_freq=1)
    train_job.main_loop()
print "escape time : ", round(time()-t0, 3), "s"


Running training
Parameter and initial learning rate summary:
/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/train.py:85: UserWarning: dataset has no yaml src, model won't know what data it was trained on
  "data it was trained on")
	h0_W: 0.1
	h0_b: 0.1
	h1_W: 0.1
	h1_b: 0.1
	softmax_b: 0.1
	softmax_W: 0.1
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 1.116040 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.150263 seconds
Monitored channels: 
	learning_rate
	total_seconds_last_epoch
	training_seconds_this_epoch
	valid_h0_col_norms_max
	valid_h0_col_norms_mean
	valid_h0_col_norms_min
	valid_h0_max_x_max_u
	valid_h0_max_x_mean_u
	valid_h0_max_x_min_u
	valid_h0_mean_x_max_u
	valid_h0_mean_x_mean_u
	valid_h0_mean_x_min_u
	valid_h0_min_x_max_u
	valid_h0_min_x_mean_u
	valid_h0_min_x_min_u
	valid_h0_range_x_max_u
	valid_h0_range_x_mean_u
	valid_h0_range_x_min_u
	valid_h0_row_norms_max
	valid_h0_row_norms_mean
	valid_h0_row_norms_min
	valid_h1_col_norms_max
	valid_h1_col_norms_mean
	valid_h1_col_norms_min
	valid_h1_max_x_max_u
	valid_h1_max_x_mean_u
	valid_h1_max_x_min_u
	valid_h1_mean_x_max_u
	valid_h1_mean_x_mean_u
	valid_h1_mean_x_min_u
	valid_h1_min_x_max_u
	valid_h1_min_x_mean_u
	valid_h1_min_x_min_u
	valid_h1_range_x_max_u
	valid_h1_range_x_mean_u
	valid_h1_range_x_min_u
	valid_h1_row_norms_max
	valid_h1_row_norms_mean
	valid_h1_row_norms_min
	valid_objective
	valid_y_col_norms_max
	valid_y_col_norms_mean
	valid_y_col_norms_min
	valid_y_max_max_class
	valid_y_mean_max_class
	valid_y_min_max_class
	valid_y_misclass
	valid_y_nll
	valid_y_row_norms_max
	valid_y_row_norms_mean
	valid_y_row_norms_min
Compiling accum...
graph size: 200
Compiling accum done. Time elapsed: 2.555339 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 0
	Examples seen: 0
	learning_rate: 0.1
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 0.0
	valid_h0_col_norms_max: 9.01184708572
	valid_h0_col_norms_mean: 6.33669927554
	valid_h0_col_norms_min: 3.9261697119
	valid_h0_max_x_max_u: 11.2880327606
	valid_h0_max_x_mean_u: 3.91097932087
	valid_h0_max_x_min_u: 0.604315111206
	valid_h0_mean_x_max_u: 4.49308367574
	valid_h0_mean_x_mean_u: 1.08868462919
	valid_h0_mean_x_min_u: 0.0755258455991
	valid_h0_min_x_max_u: 0.975326403512
	valid_h0_min_x_mean_u: 0.0663122809301
	valid_h0_min_x_min_u: 1.71519420225e-05
	valid_h0_range_x_max_u: 11.0868709497
	valid_h0_range_x_mean_u: 3.84466703994
	valid_h0_range_x_min_u: 0.602491866579
	valid_h0_row_norms_max: 9.17260496152
	valid_h0_row_norms_mean: 6.38221596831
	valid_h0_row_norms_min: 3.71216670841
	valid_h1_col_norms_max: 8.78310559421
	valid_h1_col_norms_mean: 6.28492595118
	valid_h1_col_norms_min: 4.30337178024
	valid_h1_max_x_max_u: 54.0563944344
	valid_h1_max_x_mean_u: 14.6220533434
	valid_h1_max_x_min_u: 7.90681366255e-06
	valid_h1_mean_x_max_u: 27.7509956073
	valid_h1_mean_x_mean_u: 4.04756524354
	valid_h1_mean_x_min_u: 1.26131739499e-07
	valid_h1_min_x_max_u: 11.6471258618
	valid_h1_min_x_mean_u: 0.220910279933
	valid_h1_min_x_min_u: 1.12228497596e-22
	valid_h1_range_x_max_u: 48.5262775798
	valid_h1_range_x_mean_u: 14.4011430635
	valid_h1_range_x_min_u: 7.9068136625e-06
	valid_h1_row_norms_max: 9.18014239095
	valid_h1_row_norms_mean: 6.26290467936
	valid_h1_row_norms_min: 3.7547616186
	valid_objective: 2.30258509299
	valid_y_col_norms_max: 0.0
	valid_y_col_norms_mean: 0.0
	valid_y_col_norms_min: 0.0
	valid_y_max_max_class: 0.1
	valid_y_mean_max_class: 0.1
	valid_y_min_max_class: 0.1
	valid_y_misclass: 0.9009
	valid_y_nll: 2.30258509299
	valid_y_row_norms_max: 0.0
	valid_y_row_norms_mean: 0.0
	valid_y_row_norms_min: 0.0
Saving to train_best.pkl...
Saving to train_best.pkl done. Time elapsed: 2.011521 seconds
/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/monitor.py:572: UserWarning: Trained model saved without indicating yaml_src
  'indicating yaml_src')
Time this epoch: 17.459340 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 500
	Examples seen: 50000
	learning_rate: 0.1
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 17.45934
	valid_h0_col_norms_max: 8.95849760275
	valid_h0_col_norms_mean: 6.31649684628
	valid_h0_col_norms_min: 4.02562489058
	valid_h0_max_x_max_u: 24.2454379314
	valid_h0_max_x_mean_u: 2.6806868412
	valid_h0_max_x_min_u: 0.0639223198311
	valid_h0_mean_x_max_u: 13.7350825503
	valid_h0_mean_x_mean_u: 0.771428219771
	valid_h0_mean_x_min_u: 0.00339624414824
	valid_h0_min_x_max_u: 6.21969609993
	valid_h0_min_x_mean_u: 0.132779166783
	valid_h0_min_x_min_u: 9.56576699942e-08
	valid_h0_range_x_max_u: 19.2563496543
	valid_h0_range_x_mean_u: 2.54790767442
	valid_h0_range_x_min_u: 0.0639189762871
	valid_h0_row_norms_max: 9.150629016
	valid_h0_row_norms_mean: 6.36265596668
	valid_h0_row_norms_min: 3.71222707762
	valid_h1_col_norms_max: 8.74417111975
	valid_h1_col_norms_mean: 6.27747148424
	valid_h1_col_norms_min: 4.29659534425
	valid_h1_max_x_max_u: 15.71128365
	valid_h1_max_x_mean_u: 0.164328735328
	valid_h1_max_x_min_u: 3.8361199635e-21
	valid_h1_mean_x_max_u: 1.78753826129
	valid_h1_mean_x_mean_u: 0.0100616711068
	valid_h1_mean_x_min_u: 4.21433997385e-23
	valid_h1_min_x_max_u: 2.51108434155e-05
	valid_h1_min_x_mean_u: 4.54982610212e-08
	valid_h1_min_x_min_u: 2.88794521799e-46
	valid_h1_range_x_max_u: 15.7112787846
	valid_h1_range_x_mean_u: 0.16432868983
	valid_h1_range_x_min_u: 3.8361199635e-21
	valid_h1_row_norms_max: 9.15606714587
	valid_h1_row_norms_mean: 6.25791639705
	valid_h1_row_norms_min: 3.80454518389
	valid_objective: 1.76324312786
	valid_y_col_norms_max: 13.6361504007
	valid_y_col_norms_mean: 5.0227263357
	valid_y_col_norms_min: 2.23812962117
	valid_y_max_max_class: 0.76799260714
	valid_y_mean_max_class: 0.246676656903
	valid_y_min_max_class: 0.135062604306
	valid_y_misclass: 0.525
	valid_y_nll: 1.70315619693
	valid_y_row_norms_max: 3.67912066621
	valid_y_row_norms_mean: 0.505085974326
	valid_y_row_norms_min: 0.0657063671215
Saving to train_best.pkl...
Saving to train_best.pkl done. Time elapsed: 1.332532 seconds
Saving to train.pkl...
Saving to train.pkl done. Time elapsed: 1.219754 seconds
Time this epoch: 15.362646 seconds
Monitoring step:
	Epochs seen: 2
	Batches seen: 1000
	Examples seen: 100000
	learning_rate: 0.1
	total_seconds_last_epoch: 24.823171
	training_seconds_this_epoch: 15.362646
	valid_h0_col_norms_max: 8.94964570584
	valid_h0_col_norms_mean: 6.31341674407
	valid_h0_col_norms_min: 4.02515291235
	valid_h0_max_x_max_u: 23.480693013
	valid_h0_max_x_mean_u: 2.68082596438
	valid_h0_max_x_min_u: 0.0600996045252
	valid_h0_mean_x_max_u: 13.1648148764
	valid_h0_mean_x_mean_u: 0.735634925797
	valid_h0_mean_x_min_u: 0.0030894677288
	valid_h0_min_x_max_u: 5.88444975048
	valid_h0_min_x_mean_u: 0.115056692188
	valid_h0_min_x_min_u: 8.07685612043e-08
	valid_h0_range_x_max_u: 19.0694519421
	valid_h0_range_x_mean_u: 2.5657692722
	valid_h0_range_x_min_u: 0.0600936742904
	valid_h0_row_norms_max: 9.14728204485
	valid_h0_row_norms_mean: 6.35969997987
	valid_h0_row_norms_min: 3.71238920555
	valid_h1_col_norms_max: 8.74408395459
	valid_h1_col_norms_mean: 6.2753224677
	valid_h1_col_norms_min: 4.2962251165
	valid_h1_max_x_max_u: 19.9239176859
	valid_h1_max_x_mean_u: 0.355807289819
	valid_h1_max_x_min_u: 1.10647304242e-20
	valid_h1_mean_x_max_u: 1.67045058867
	valid_h1_mean_x_mean_u: 0.0198046428681
	valid_h1_mean_x_min_u: 1.23106873503e-22
	valid_h1_min_x_max_u: 1.40489538715e-05
	valid_h1_min_x_mean_u: 3.17825115898e-08
	valid_h1_min_x_min_u: 1.29901137137e-47
	valid_h1_range_x_max_u: 19.9239173175
	valid_h1_range_x_mean_u: 0.355807258037
	valid_h1_range_x_min_u: 1.10647304242e-20
	valid_h1_row_norms_max: 9.15083481779
	valid_h1_row_norms_mean: 6.25597765414
	valid_h1_row_norms_min: 3.81549887796
	valid_objective: 1.46522884736
	valid_y_col_norms_max: 13.5570774514
	valid_y_col_norms_mean: 4.9869884625
	valid_y_col_norms_min: 2.23777764023
	valid_y_max_max_class: 0.990424379684
	valid_y_mean_max_class: 0.385708717634
	valid_y_min_max_class: 0.153763332938
	valid_y_misclass: 0.3721
	valid_y_nll: 1.28028400811
	valid_y_row_norms_max: 3.67912066621
	valid_y_row_norms_mean: 0.498620977652
	valid_y_row_norms_min: 0.0571743135467
Saving to train_best.pkl...
Saving to train_best.pkl done. Time elapsed: 1.104155 seconds
Saving to train.pkl...
Saving to train.pkl done. Time elapsed: 1.069278 seconds
Time this epoch: 15.450538 seconds
Monitoring step:
	Epochs seen: 3
	Batches seen: 1500
	Examples seen: 150000
	learning_rate: 0.1
	total_seconds_last_epoch: 22.073477
	training_seconds_this_epoch: 15.450538
	valid_h0_col_norms_max: 8.9473995979
	valid_h0_col_norms_mean: 6.31057590081
	valid_h0_col_norms_min: 4.02145131535
	valid_h0_max_x_max_u: 22.5169323095
	valid_h0_max_x_mean_u: 2.64821874296
	valid_h0_max_x_min_u: 0.0561663273396
	valid_h0_mean_x_max_u: 12.6002808926
	valid_h0_mean_x_mean_u: 0.693155551834
	valid_h0_mean_x_min_u: 0.0027718770734
	valid_h0_min_x_max_u: 5.58583974957
	valid_h0_min_x_mean_u: 0.0992259868775
	valid_h0_min_x_min_u: 6.62508749567e-08
	valid_h0_range_x_max_u: 18.5496593015
	valid_h0_range_x_mean_u: 2.54899275608
	valid_h0_range_x_min_u: 0.0561605070682
	valid_h0_row_norms_max: 9.14531155278
	valid_h0_row_norms_mean: 6.35696209661
	valid_h0_row_norms_min: 3.71262321652
	valid_h1_col_norms_max: 8.74306660486
	valid_h1_col_norms_mean: 6.27331710786
	valid_h1_col_norms_min: 4.29536307962
	valid_h1_max_x_max_u: 22.7105380016
	valid_h1_max_x_mean_u: 0.533216867948
	valid_h1_max_x_min_u: 5.88193764917e-20
	valid_h1_mean_x_max_u: 2.06416701724
	valid_h1_mean_x_mean_u: 0.0307771229594
	valid_h1_mean_x_min_u: 7.20527389124e-22
	valid_h1_min_x_max_u: 1.50017585919e-05
	valid_h1_min_x_mean_u: 3.01934204525e-08
	valid_h1_min_x_min_u: 2.85253780297e-46
	valid_h1_range_x_max_u: 22.7105378819
	valid_h1_range_x_mean_u: 0.533216837754
	valid_h1_range_x_min_u: 5.88193764917e-20
	valid_h1_row_norms_max: 9.14541168201
	valid_h1_row_norms_mean: 6.2541205978
	valid_h1_row_norms_min: 3.82082458202
	valid_objective: 1.25880090753
	valid_y_col_norms_max: 13.5037488164
	valid_y_col_norms_mean: 4.95916937096
	valid_y_col_norms_min: 2.25261205595
	valid_y_max_max_class: 0.997874151073
	valid_y_mean_max_class: 0.504086858385
	valid_y_min_max_class: 0.170070359037
	valid_y_misclass: 0.3318
	valid_y_nll: 0.987839035319
	valid_y_row_norms_max: 3.67912066621
	valid_y_row_norms_mean: 0.494875472274
	valid_y_row_norms_min: 0.0560972021726
Saving to train_best.pkl...
Saving to train_best.pkl done. Time elapsed: 1.336835 seconds
Saving to train.pkl...
Saving to train.pkl done. Time elapsed: 1.357832 seconds
Time this epoch: 16.521655 seconds
Monitoring step:
	Epochs seen: 4
	Batches seen: 2000
	Examples seen: 200000
	learning_rate: 0.1
	total_seconds_last_epoch: 22.817628
	training_seconds_this_epoch: 16.521655
	valid_h0_col_norms_max: 8.94733094855
	valid_h0_col_norms_mean: 6.30798356916
	valid_h0_col_norms_min: 4.01635629269
	valid_h0_max_x_max_u: 22.0878071846
	valid_h0_max_x_mean_u: 2.60734716112
	valid_h0_max_x_min_u: 0.052332228092
	valid_h0_mean_x_max_u: 12.2958491429
	valid_h0_mean_x_mean_u: 0.653912192914
	valid_h0_mean_x_min_u: 0.00262902882087
	valid_h0_min_x_max_u: 5.42165448469
	valid_h0_min_x_mean_u: 0.0860152966474
	valid_h0_min_x_min_u: 5.91864791671e-08
	valid_h0_range_x_max_u: 18.3620070711
	valid_h0_range_x_mean_u: 2.52133186447
	valid_h0_range_x_min_u: 0.052326451024
	valid_h0_row_norms_max: 9.13203493961
	valid_h0_row_norms_mean: 6.3544485815
	valid_h0_row_norms_min: 3.71263103049
	valid_h1_col_norms_max: 8.74209534375
	valid_h1_col_norms_mean: 6.27145146564
	valid_h1_col_norms_min: 4.29471577095
	valid_h1_max_x_max_u: 26.7042804462
	valid_h1_max_x_mean_u: 0.668208353467
	valid_h1_max_x_min_u: 3.21910027109e-19
	valid_h1_mean_x_max_u: 2.5866619552
	valid_h1_mean_x_mean_u: 0.0396556382812
	valid_h1_mean_x_min_u: 4.29525735762e-21
	valid_h1_min_x_max_u: 2.00733632335e-05
	valid_h1_min_x_mean_u: 3.249411804e-08
	valid_h1_min_x_min_u: 4.30058749588e-46
	valid_h1_range_x_max_u: 26.7042804294
	valid_h1_range_x_mean_u: 0.668208320973
	valid_h1_range_x_min_u: 3.21910027109e-19
	valid_h1_row_norms_max: 9.14193314975
	valid_h1_row_norms_mean: 6.25239690231
	valid_h1_row_norms_min: 3.82013098185
	valid_objective: 1.1406447193
	valid_y_col_norms_max: 13.4705011399
	valid_y_col_norms_mean: 4.93168540273
	valid_y_col_norms_min: 2.25843886103
	valid_y_max_max_class: 0.99947053196
	valid_y_mean_max_class: 0.571788934735
	valid_y_min_max_class: 0.185984015893
	valid_y_misclass: 0.2814
	valid_y_nll: 0.868200156724
	valid_y_row_norms_max: 3.67912066621
	valid_y_row_norms_mean: 0.491038695728
	valid_y_row_norms_min: 0.0523068288079
Saving to train_best.pkl...
Saving to train_best.pkl done. Time elapsed: 1.147775 seconds
Saving to train.pkl...
Saving to train.pkl done. Time elapsed: 1.122892 seconds

In [ ]:
X = model.get_input_space().make_batch_theano()
Y = model.fprop(X)