In [30]:
import theano
from pylearn2.models import mlp
from pylearn2.training_algorithms import sgd, learning_rule
from pylearn2.termination_criteria import EpochCounter, MonitorBased
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.train_extensions import best_params
from pylearn2.utils import serial
from sklearn.externals import joblib
import numpy as np
from random import randint
from keras.utils import np_utils
import itertools

In [2]:
from pprint import PrettyPrinter
pp = PrettyPrinter(depth=6)

In [3]:
# features, lables를 DenseMatrix에 넣어야함
class dataset(DenseDesignMatrix):
    def __init__(self, features, labels):
        super(dataset, self).__init__(X=features, y=labels)
        
    def split(self, prop=.8):
        cutoff = int(len(self.y) * prop)
        X1, X2 = self.X[:cutoff], self.X[cutoff:]
        y1, y2 = self.y[:cutoff], self.y[cutoff:]
        return dataset(X1, y1), dataset(X2, y2)
    
    @property
    def nr_inputs(self):
        return len(self.X[0])
 
    def __len__(self):
        return self.X.shape[0]
 
    def __iter__(self):
        return itertools.izip_longest(self.X, self.y)

In [4]:
features = joblib.load("./mldata/features.mat")
features = features.astype("float32")
features /= 255.0
labels = joblib.load("./mldata/lables.mat")
labels = np_utils.to_categorical(labels, 9)

In [5]:
ds_train = dataset(features, labels)
ds_train, ds_valid = ds_train.split(0.6)
ds_valid, ds_test = ds_valid.split(0.5)

In [6]:
print ds_train.X[0].shape
print ds_train.y[0]


(784,)
[ 0.  0.  0.  0.  0.  0.  1.  0.  0.]

In [24]:
hidden_layer = mlp.Tanh(layer_name='hidden', dim=20, irange=.1, init_bias=1.)
output_layer = mlp.Softmax(9, 'output', irange=.1)
layers = [hidden_layer, output_layer]

# termination criterion that stops after 50 epochs without
# any increase in misclassification on the validation set
termination_criterion = MonitorBased(channel_name='output_misclass', N=2, prop_decrease=0.0)

# momentum
initial_momentum = .5
final_momentum = .99
start = 1
saturate = 4
momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate)
momentum_rule = learning_rule.Momentum(initial_momentum)

# learning rate
start = 1
saturate = 4
decay_factor = .1
learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor)

# trainer = sgd.SGD(learning_rate=.05, batch_size=50, termination_criterion=EpochCounter(2))
trainer = sgd.SGD(learning_rate=.05, batch_size=50, monitoring_dataset=ds_valid,
                  termination_criterion=termination_criterion, learning_rule=momentum_rule)
layers = [hidden_layer, output_layer]


ann = mlp.MLP(layers, nvis=784)
trainer.setup(ann, ds_train)


Parameter and initial learning rate summary:
	hidden_W: 0.05
	hidden_b: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 0.409526 seconds

In [25]:
monitor_save_best = best_params.MonitorBasedSaveBest('output_misclass', '/tmp/best.pkl')

while True:
    trainer.train(dataset=ds_train)
    ann.monitor()
    monitor_save_best.on_monitor(ann, ds_valid, trainer)
    if not trainer.continue_learning(ann):
        break
    momentum_adjustor.on_monitor(ann, ds_valid, trainer)
    learning_rate_adjustor.on_monitor(ann, ds_valid, trainer)


compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.264366 seconds
Monitored channels: 
	hidden_col_norms_max
	hidden_col_norms_mean
	hidden_col_norms_min
	hidden_max_x_max_u
	hidden_max_x_mean_u
	hidden_max_x_min_u
	hidden_mean_x_max_u
	hidden_mean_x_mean_u
	hidden_mean_x_min_u
	hidden_min_x_max_u
	hidden_min_x_mean_u
	hidden_min_x_min_u
	hidden_range_x_max_u
	hidden_range_x_mean_u
	hidden_range_x_min_u
	hidden_row_norms_max
	hidden_row_norms_mean
	hidden_row_norms_min
	learning_rate
	momentum
	objective
	output_col_norms_max
	output_col_norms_mean
	output_col_norms_min
	output_max_max_class
	output_mean_max_class
	output_min_max_class
	output_misclass
	output_nll
	output_row_norms_max
	output_row_norms_mean
	output_row_norms_min
Compiling accum...
graph size: 115
Compiling accum done. Time elapsed: 1.271711 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 72
	Examples seen: 3600
	hidden_col_norms_max: 1.73307880083
	hidden_col_norms_mean: 1.65759180639
	hidden_col_norms_min: 1.5888516511
	hidden_max_x_max_u: 0.999878065951
	hidden_max_x_mean_u: 0.983804547344
	hidden_max_x_min_u: 0.92032690769
	hidden_mean_x_max_u: 0.995071024127
	hidden_mean_x_mean_u: 0.240526743329
	hidden_mean_x_min_u: -0.37265344082
	hidden_min_x_max_u: 0.899351467457
	hidden_min_x_mean_u: -0.688276242231
	hidden_min_x_min_u: -0.999650266968
	hidden_range_x_max_u: 1.9940681076
	hidden_range_x_mean_u: 1.67208078957
	hidden_range_x_min_u: 0.100103476061
	hidden_row_norms_max: 0.347956359471
	hidden_row_norms_mean: 0.263240792505
	hidden_row_norms_min: 0.168948135163
	learning_rate: 0.05
	momentum: 0.5
	objective: 0.584023902956
	output_col_norms_max: 1.16219121224
	output_col_norms_mean: 0.946162896006
	output_col_norms_min: 0.591922780639
	output_max_max_class: 0.886036423998
	output_mean_max_class: 0.683171408494
	output_min_max_class: 0.248846058941
	output_misclass: 0.105833333333
	output_nll: 0.584023902956
	output_row_norms_max: 0.928268750688
	output_row_norms_mean: 0.601736931962
	output_row_norms_min: 0.17793975848
Saving to /tmp/best.pkl...
Saving to /tmp/best.pkl done. Time elapsed: 0.041406 seconds
/Users/dikien/anaconda/lib/python2.7/site-packages/pylearn2/monitor.py:572: UserWarning: Trained model saved without indicating yaml_src
  'indicating yaml_src')
Monitoring step:
	Epochs seen: 0
	Batches seen: 144
	Examples seen: 7200
	hidden_col_norms_max: 1.75770359376
	hidden_col_norms_mean: 1.6791520007
	hidden_col_norms_min: 1.58899865794
	hidden_max_x_max_u: 0.999938961429
	hidden_max_x_mean_u: 0.987281005939
	hidden_max_x_min_u: 0.901720543399
	hidden_mean_x_max_u: 0.995046261364
	hidden_mean_x_mean_u: 0.23844876583
	hidden_mean_x_min_u: -0.357851397849
	hidden_min_x_max_u: 0.899378592621
	hidden_min_x_mean_u: -0.700460355438
	hidden_min_x_min_u: -0.99984028989
	hidden_range_x_max_u: 1.99711322869
	hidden_range_x_mean_u: 1.68774136138
	hidden_range_x_min_u: 0.100059509355
	hidden_row_norms_max: 0.354238565915
	hidden_row_norms_mean: 0.26660138633
	hidden_row_norms_min: 0.169732484124
	learning_rate: 0.03875
	momentum: 0.5
	objective: 0.405878823986
	output_col_norms_max: 1.37480093028
	output_col_norms_mean: 1.19312306039
	output_col_norms_min: 0.875839548659
	output_max_max_class: 0.94240072391
	output_mean_max_class: 0.821162151159
	output_min_max_class: 0.315854785268
	output_misclass: 0.0775
	output_nll: 0.405878823986
	output_row_norms_max: 1.08299805825
	output_row_norms_mean: 0.752874362525
	output_row_norms_min: 0.169946343448
Saving to /tmp/best.pkl...
Saving to /tmp/best.pkl done. Time elapsed: 0.022042 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 216
	Examples seen: 10800
	hidden_col_norms_max: 1.77603840045
	hidden_col_norms_mean: 1.69381573827
	hidden_col_norms_min: 1.58909937194
	hidden_max_x_max_u: 0.999948218155
	hidden_max_x_mean_u: 0.990506069044
	hidden_max_x_min_u: 0.939817662184
	hidden_mean_x_max_u: 0.995070377698
	hidden_mean_x_mean_u: 0.251990526145
	hidden_mean_x_min_u: -0.352612161602
	hidden_min_x_max_u: 0.899540286524
	hidden_min_x_mean_u: -0.695633814391
	hidden_min_x_min_u: -0.999966766907
	hidden_range_x_max_u: 1.99692848849
	hidden_range_x_mean_u: 1.68613988344
	hidden_range_x_min_u: 0.0998768319835
	hidden_row_norms_max: 0.354768408238
	hidden_row_norms_mean: 0.26886161766
	hidden_row_norms_min: 0.169639051226
	learning_rate: 0.0275
	momentum: 0.663333333333
	objective: 0.353901475122
	output_col_norms_max: 1.48608841125
	output_col_norms_mean: 1.32805504531
	output_col_norms_min: 1.04640343982
	output_max_max_class: 0.96687626586
	output_mean_max_class: 0.867545510353
	output_min_max_class: 0.356265389774
	output_misclass: 0.0733333333333
	output_nll: 0.353901475122
	output_row_norms_max: 1.20444158802
	output_row_norms_mean: 0.837568424695
	output_row_norms_min: 0.172164275663
Saving to /tmp/best.pkl...
Saving to /tmp/best.pkl done. Time elapsed: 0.027905 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 288
	Examples seen: 14400
	hidden_col_norms_max: 1.79346659351
	hidden_col_norms_mean: 1.70747316188
	hidden_col_norms_min: 1.58915324285
	hidden_max_x_max_u: 0.999970647326
	hidden_max_x_mean_u: 0.990894001079
	hidden_max_x_min_u: 0.946154401173
	hidden_mean_x_max_u: 0.995106395991
	hidden_mean_x_mean_u: 0.271480455466
	hidden_mean_x_min_u: -0.337696340111
	hidden_min_x_max_u: 0.899688629114
	hidden_min_x_mean_u: -0.695793690147
	hidden_min_x_min_u: -0.999935453575
	hidden_range_x_max_u: 1.99913340497
	hidden_range_x_mean_u: 1.68668769123
	hidden_range_x_min_u: 0.0997248623409
	hidden_row_norms_max: 0.374635993536
	hidden_row_norms_mean: 0.270956351883
	hidden_row_norms_min: 0.17139904413
	learning_rate: 0.01625
	momentum: 0.826666666667
	objective: 0.332685736167
	output_col_norms_max: 1.5671546799
	output_col_norms_mean: 1.42381266029
	output_col_norms_min: 1.16863798557
	output_max_max_class: 0.970873528803
	output_mean_max_class: 0.889864436139
	output_min_max_class: 0.36806531933
	output_misclass: 0.075
	output_nll: 0.332685736167
	output_row_norms_max: 1.27943892208
	output_row_norms_mean: 0.897969689518
	output_row_norms_min: 0.174252258982
Monitoring step:
	Epochs seen: 0
	Batches seen: 360
	Examples seen: 18000
	hidden_col_norms_max: 1.87127695886
	hidden_col_norms_mean: 1.74976805933
	hidden_col_norms_min: 1.58931534378
	hidden_max_x_max_u: 0.999983309458
	hidden_max_x_mean_u: 0.986092540544
	hidden_max_x_min_u: 0.841729946351
	hidden_mean_x_max_u: 0.995153212097
	hidden_mean_x_mean_u: 0.25076111429
	hidden_mean_x_min_u: -0.355134762016
	hidden_min_x_max_u: 0.899712325461
	hidden_min_x_mean_u: -0.704576129616
	hidden_min_x_min_u: -0.99999830743
	hidden_range_x_max_u: 1.99870742419
	hidden_range_x_mean_u: 1.69066867016
	hidden_range_x_min_u: 0.0997991173654
	hidden_row_norms_max: 0.422079314934
	hidden_row_norms_mean: 0.277472072654
	hidden_row_norms_min: 0.177876847464
	learning_rate: 0.005
	momentum: 0.99
	objective: 0.332309377663
	output_col_norms_max: 1.78525740231
	output_col_norms_mean: 1.59815784259
	output_col_norms_min: 1.3749152195
	output_max_max_class: 0.983303277818
	output_mean_max_class: 0.906907125806
	output_min_max_class: 0.35054630308
	output_misclass: 0.07
	output_nll: 0.332309377663
	output_row_norms_max: 1.43910686273
	output_row_norms_mean: 1.00849978698
	output_row_norms_min: 0.185629471271
Saving to /tmp/best.pkl...
Saving to /tmp/best.pkl done. Time elapsed: 0.021165 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 432
	Examples seen: 21600
	hidden_col_norms_max: 1.98566333312
	hidden_col_norms_mean: 1.82148234303
	hidden_col_norms_min: 1.59010708097
	hidden_max_x_max_u: 0.999992364764
	hidden_max_x_mean_u: 0.988125225049
	hidden_max_x_min_u: 0.881413088248
	hidden_mean_x_max_u: 0.995408909456
	hidden_mean_x_mean_u: 0.206924147956
	hidden_mean_x_min_u: -0.450246682027
	hidden_min_x_max_u: 0.900502304247
	hidden_min_x_mean_u: -0.718221186578
	hidden_min_x_min_u: -0.999998856172
	hidden_range_x_max_u: 1.99996027972
	hidden_range_x_mean_u: 1.70634641163
	hidden_range_x_min_u: 0.0993138608745
	hidden_row_norms_max: 0.493241673077
	hidden_row_norms_mean: 0.288544246361
	hidden_row_norms_min: 0.180893541479
	learning_rate: 0.005
	momentum: 0.99
	objective: 0.314436577804
	output_col_norms_max: 2.01452012974
	output_col_norms_mean: 1.778007174
	output_col_norms_min: 1.57071464689
	output_max_max_class: 0.991940455786
	output_mean_max_class: 0.925847184304
	output_min_max_class: 0.325649284734
	output_misclass: 0.0716666666667
	output_nll: 0.314436577804
	output_row_norms_max: 1.63583247009
	output_row_norms_mean: 1.12562391544
	output_row_norms_min: 0.229063007757
Monitoring step:
	Epochs seen: 0
	Batches seen: 504
	Examples seen: 25200
	hidden_col_norms_max: 2.08478762846
	hidden_col_norms_mean: 1.89823036379
	hidden_col_norms_min: 1.59195329591
	hidden_max_x_max_u: 0.999998408546
	hidden_max_x_mean_u: 0.990973517609
	hidden_max_x_min_u: 0.92613502511
	hidden_mean_x_max_u: 0.99579022193
	hidden_mean_x_mean_u: 0.198369883334
	hidden_mean_x_min_u: -0.447353353411
	hidden_min_x_max_u: 0.901711843804
	hidden_min_x_mean_u: -0.771104622486
	hidden_min_x_min_u: -0.999998907812
	hidden_range_x_max_u: 1.99999422625
	hidden_range_x_mean_u: 1.76207814009
	hidden_range_x_min_u: 0.0981373571972
	hidden_row_norms_max: 0.543368510498
	hidden_row_norms_mean: 0.300471987861
	hidden_row_norms_min: 0.189884953977
	learning_rate: 0.005
	momentum: 0.99
	objective: 0.318032096179
	output_col_norms_max: 2.18049384854
	output_col_norms_mean: 1.90093380741
	output_col_norms_min: 1.62592287357
	output_max_max_class: 0.994515339321
	output_mean_max_class: 0.926383294551
	output_min_max_class: 0.331516765795
	output_misclass: 0.0733333333333
	output_nll: 0.318032096179
	output_row_norms_max: 1.77675031341
	output_row_norms_mean: 1.20295752835
	output_row_norms_min: 0.237318885816