In [14]:
from pylearn2.config import yaml_parse

In [11]:
!ls /Users/dikien/Downloads/pylearn2/pylearn2/scripts/tutorials/stacked_autoencoders/
%cd /Users/dikien/Downloads/pylearn2/pylearn2/scripts/tutorials/stacked_autoencoders/


README                     dae_l2.yaml                stacked_autoencoders.ipynb
dae_l1.yaml                dae_mlp.yaml               tests
/Users/dikien/Downloads/pylearn2/pylearn2/scripts/tutorials/stacked_autoencoders

In [24]:
with open('dae_l1.yaml', 'r') as f:
    layer1_yaml = f.read()
print layer1_yaml


!obj:pylearn2.train.Train {
    dataset: &train !obj:pylearn2.datasets.mnist.MNIST {
        which_set: 'train',
        start: 0,
        stop: %(train_stop)i
    },
    model: !obj:pylearn2.models.autoencoder.DenoisingAutoencoder {
        nvis : 784,
        nhid : %(nhid)i,
        irange : 0.05,
        corruptor: !obj:pylearn2.corruption.BinomialCorruptor {
            corruption_level: .2,
        },
        act_enc: "tanh",
        act_dec: null,    # Linear activation on the decoder side.
    },
    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        learning_rate : 1e-3,
        batch_size : %(batch_size)i,
        monitoring_batches : %(monitoring_batches)i,
        monitoring_dataset : *train,
        cost : !obj:pylearn2.costs.autoencoder.MeanSquaredReconstructionError {},
        termination_criterion : !obj:pylearn2.termination_criteria.EpochCounter {
            max_epochs: %(max_epochs)i,
        },
    },
    save_path: "%(save_path)s/dae_l1.pkl",
    save_freq: 1
}


In [25]:
hyper_params_l1 = {'train_stop' : 50000,
                   'batch_size' : 100,
                   'monitoring_batches' : 5,
                   'nhid' : 500,
                   'max_epochs' : 10,
                   'save_path' : '.'}
layer1_yaml = layer1_yaml % (hyper_params_l1)
print layer1_yaml


!obj:pylearn2.train.Train {
    dataset: &train !obj:pylearn2.datasets.mnist.MNIST {
        which_set: 'train',
        start: 0,
        stop: 50000
    },
    model: !obj:pylearn2.models.autoencoder.DenoisingAutoencoder {
        nvis : 784,
        nhid : 500,
        irange : 0.05,
        corruptor: !obj:pylearn2.corruption.BinomialCorruptor {
            corruption_level: .2,
        },
        act_enc: "tanh",
        act_dec: null,    # Linear activation on the decoder side.
    },
    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        learning_rate : 1e-3,
        batch_size : 100,
        monitoring_batches : 5,
        monitoring_dataset : *train,
        cost : !obj:pylearn2.costs.autoencoder.MeanSquaredReconstructionError {},
        termination_criterion : !obj:pylearn2.termination_criteria.EpochCounter {
            max_epochs: 10,
        },
    },
    save_path: "./dae_l1.pkl",
    save_freq: 1
}


In [26]:
train = yaml_parse.load(layer1_yaml)
train.main_loop()


Parameter and initial learning rate summary:
	vb: 0.001
	hb: 0.001
	W: 0.001
	Wprime: 0.001
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 8.535399 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.041074 seconds
Monitored channels: 
	learning_rate
	objective
	total_seconds_last_epoch
	training_seconds_this_epoch
Compiling accum...
graph size: 19
Compiling accum done. Time elapsed: 1.713480 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 0
	Examples seen: 0
	learning_rate: 0.001
	objective: 92.111948471
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 0.0
Time this epoch: 10.570406 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 500
	Examples seen: 50000
	learning_rate: 0.001
	objective: 26.4766793624
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 10.570406
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.046847 seconds
Time this epoch: 10.761591 seconds
Monitoring step:
	Epochs seen: 2
	Batches seen: 1000
	Examples seen: 100000
	learning_rate: 0.001
	objective: 20.0324420641
	total_seconds_last_epoch: 16.675167
	training_seconds_this_epoch: 10.761591
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.416973 seconds
Time this epoch: 10.619281 seconds
Monitoring step:
	Epochs seen: 3
	Batches seen: 1500
	Examples seen: 150000
	learning_rate: 0.001
	objective: 16.9526632246
	total_seconds_last_epoch: 18.241106
	training_seconds_this_epoch: 10.619281
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 0.904373 seconds
Time this epoch: 11.538486 seconds
Monitoring step:
	Epochs seen: 4
	Batches seen: 2000
	Examples seen: 200000
	learning_rate: 0.001
	objective: 15.1179329275
	total_seconds_last_epoch: 16.58631
	training_seconds_this_epoch: 11.538486
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.273636 seconds
Time this epoch: 10.053852 seconds
Monitoring step:
	Epochs seen: 5
	Batches seen: 2500
	Examples seen: 250000
	learning_rate: 0.001
	objective: 13.8935779578
	total_seconds_last_epoch: 19.899654
	training_seconds_this_epoch: 10.053852
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.236007 seconds
Time this epoch: 14.470846 seconds
Monitoring step:
	Epochs seen: 6
	Batches seen: 3000
	Examples seen: 300000
	learning_rate: 0.001
	objective: 13.0115734208
	total_seconds_last_epoch: 16.380132
	training_seconds_this_epoch: 14.470846
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.553399 seconds
Time this epoch: 11.414060 seconds
Monitoring step:
	Epochs seen: 7
	Batches seen: 3500
	Examples seen: 350000
	learning_rate: 0.001
	objective: 12.354534632
	total_seconds_last_epoch: 22.618751
	training_seconds_this_epoch: 11.41406
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.122237 seconds
Time this epoch: 10.201545 seconds
Monitoring step:
	Epochs seen: 8
	Batches seen: 4000
	Examples seen: 400000
	learning_rate: 0.001
	objective: 11.8151804812
	total_seconds_last_epoch: 17.347741
	training_seconds_this_epoch: 10.201545
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.305958 seconds
Time this epoch: 11.302759 seconds
Monitoring step:
	Epochs seen: 9
	Batches seen: 4500
	Examples seen: 450000
	learning_rate: 0.001
	objective: 11.3887492443
	total_seconds_last_epoch: 17.157322
	training_seconds_this_epoch: 11.302759
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 0.965640 seconds
Time this epoch: 10.481926 seconds
Monitoring step:
	Epochs seen: 10
	Batches seen: 5000
	Examples seen: 500000
	learning_rate: 0.001
	objective: 11.0457817733
	total_seconds_last_epoch: 17.000882
	training_seconds_this_epoch: 10.481926
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 0.981587 seconds
Saving to ./dae_l1.pkl...
Saving to ./dae_l1.pkl done. Time elapsed: 1.001528 seconds

In [27]:
with open('dae_l2.yaml', 'r') as f:
    layer2_yaml = f.read()
print layer2_yaml


!obj:pylearn2.train.Train {
    dataset: &train !obj:pylearn2.datasets.transformer_dataset.TransformerDataset {
        raw: !obj:pylearn2.datasets.mnist.MNIST {
            which_set: 'train',
            start: 0,
            stop: %(train_stop)i
        },
        transformer: !pkl: "%(save_path)s/dae_l1.pkl"
    },
    model: !obj:pylearn2.models.autoencoder.DenoisingAutoencoder {
        nvis : %(nvis)i,
        nhid : %(nhid)i,
        irange : 0.05,
        corruptor: !obj:pylearn2.corruption.BinomialCorruptor {
            corruption_level: .3,
        },
        act_enc: "tanh",
        act_dec: null,    # Linear activation on the decoder side.
    },
    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        learning_rate : 1e-3,
        batch_size : %(batch_size)i,
        monitoring_batches : %(monitoring_batches)i,
        monitoring_dataset : *train,
        cost : !obj:pylearn2.costs.autoencoder.MeanSquaredReconstructionError {},
        termination_criterion : !obj:pylearn2.termination_criteria.EpochCounter {
            max_epochs: %(max_epochs)i,
        },
    },
    save_path: "%(save_path)s/dae_l2.pkl",
    save_freq: 1
}


In [28]:
hyper_params_l2 = {'train_stop' : 50000,
                   'batch_size' : 100,
                   'monitoring_batches' : 5,
                   'nvis' : hyper_params_l1['nhid'],
                   'nhid' : 500,
                   'max_epochs' : 10,
                   'save_path' : '.'}
layer2_yaml = layer2_yaml % (hyper_params_l2)
print layer2_yaml


!obj:pylearn2.train.Train {
    dataset: &train !obj:pylearn2.datasets.transformer_dataset.TransformerDataset {
        raw: !obj:pylearn2.datasets.mnist.MNIST {
            which_set: 'train',
            start: 0,
            stop: 50000
        },
        transformer: !pkl: "./dae_l1.pkl"
    },
    model: !obj:pylearn2.models.autoencoder.DenoisingAutoencoder {
        nvis : 500,
        nhid : 500,
        irange : 0.05,
        corruptor: !obj:pylearn2.corruption.BinomialCorruptor {
            corruption_level: .3,
        },
        act_enc: "tanh",
        act_dec: null,    # Linear activation on the decoder side.
    },
    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        learning_rate : 1e-3,
        batch_size : 100,
        monitoring_batches : 5,
        monitoring_dataset : *train,
        cost : !obj:pylearn2.costs.autoencoder.MeanSquaredReconstructionError {},
        termination_criterion : !obj:pylearn2.termination_criteria.EpochCounter {
            max_epochs: 10,
        },
    },
    save_path: "./dae_l2.pkl",
    save_freq: 1
}


In [29]:
train = yaml_parse.load(layer2_yaml)
train.main_loop()


Parameter and initial learning rate summary:
	vb: 0.001
	hb: 0.001
	W: 0.001
	Wprime: 0.001
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 0.424942 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.037834 seconds
Monitored channels: 
	learning_rate
	objective
	total_seconds_last_epoch
	training_seconds_this_epoch
Compiling accum...
graph size: 19
Compiling accum done. Time elapsed: 0.308717 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 0
	Examples seen: 0
	learning_rate: 0.001
	objective: 64.294005807
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 0.0
Time this epoch: 9.479954 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 500
	Examples seen: 50000
	learning_rate: 0.001
	objective: 20.2072944723
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 9.479954
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.734012 seconds
Time this epoch: 9.795702 seconds
Monitoring step:
	Epochs seen: 2
	Batches seen: 1000
	Examples seen: 100000
	learning_rate: 0.001
	objective: 13.7861076018
	total_seconds_last_epoch: 15.329416
	training_seconds_this_epoch: 9.795702
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.663375 seconds
Time this epoch: 10.952850 seconds
Monitoring step:
	Epochs seen: 3
	Batches seen: 1500
	Examples seen: 150000
	learning_rate: 0.001
	objective: 10.906734586
	total_seconds_last_epoch: 15.883912
	training_seconds_this_epoch: 10.95285
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.973954 seconds
Time this epoch: 11.765567 seconds
Monitoring step:
	Epochs seen: 4
	Batches seen: 2000
	Examples seen: 200000
	learning_rate: 0.001
	objective: 9.32353813901
	total_seconds_last_epoch: 17.924495
	training_seconds_this_epoch: 11.765567
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.752700 seconds
Time this epoch: 10.033027 seconds
Monitoring step:
	Epochs seen: 5
	Batches seen: 2500
	Examples seen: 250000
	learning_rate: 0.001
	objective: 8.33108113852
	total_seconds_last_epoch: 18.197243
	training_seconds_this_epoch: 10.033027
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.859218 seconds
Time this epoch: 11.996140 seconds
Monitoring step:
	Epochs seen: 6
	Batches seen: 3000
	Examples seen: 300000
	learning_rate: 0.001
	objective: 7.67753543515
	total_seconds_last_epoch: 16.316979
	training_seconds_this_epoch: 11.99614
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.783568 seconds
Time this epoch: 9.533927 seconds
Monitoring step:
	Epochs seen: 7
	Batches seen: 3500
	Examples seen: 350000
	learning_rate: 0.001
	objective: 7.21219009382
	total_seconds_last_epoch: 18.45656
	training_seconds_this_epoch: 9.533927
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.547828 seconds
Time this epoch: 9.571927 seconds
Monitoring step:
	Epochs seen: 8
	Batches seen: 4000
	Examples seen: 400000
	learning_rate: 0.001
	objective: 6.86433605747
	total_seconds_last_epoch: 14.951676
	training_seconds_this_epoch: 9.571927
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.669331 seconds
Time this epoch: 9.597112 seconds
Monitoring step:
	Epochs seen: 9
	Batches seen: 4500
	Examples seen: 450000
	learning_rate: 0.001
	objective: 6.58669781282
	total_seconds_last_epoch: 15.541953
	training_seconds_this_epoch: 9.597112
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 0.636326 seconds
Time this epoch: 9.409799 seconds
Monitoring step:
	Epochs seen: 10
	Batches seen: 5000
	Examples seen: 500000
	learning_rate: 0.001
	objective: 6.36710589611
	total_seconds_last_epoch: 15.173992
	training_seconds_this_epoch: 9.409799
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 1.723690 seconds
Saving to ./dae_l2.pkl...
Saving to ./dae_l2.pkl done. Time elapsed: 1.487059 seconds

In [30]:
with open('dae_mlp.yaml', 'r') as f:
    mlp_yaml = f.read()
print mlp_yaml


!obj:pylearn2.train.Train {
    dataset: &train !obj:pylearn2.datasets.mnist.MNIST {
        which_set: 'train',
        start: 0,
        stop: %(train_stop)i
    },
    model: !obj:pylearn2.models.mlp.MLP {
        batch_size: %(batch_size)i,
        layers: [
                 !obj:pylearn2.models.mlp.PretrainedLayer {
                     layer_name: 'h1',
                     layer_content: !pkl: "%(save_path)s/dae_l1.pkl"
                 },
                 !obj:pylearn2.models.mlp.PretrainedLayer {
                     layer_name: 'h2',
                     layer_content: !pkl: "%(save_path)s/dae_l2.pkl"
                 },
                 !obj:pylearn2.models.mlp.Softmax {
                     max_col_norm: 1.9365,
                     layer_name: 'y',
                     n_classes: 10,
                     irange: .005
                 }
                ],
        nvis: 784
    },
    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        learning_rate: .05,
        learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum {
            init_momentum: .5,
        },
        monitoring_dataset:
            {
                'valid' : !obj:pylearn2.datasets.mnist.MNIST {
                              which_set: 'train',
                              start: 50000,
                              stop: %(valid_stop)i
                          },
            },
        cost: !obj:pylearn2.costs.mlp.Default {},
        termination_criterion: !obj:pylearn2.termination_criteria.And {
            criteria: [
                !obj:pylearn2.termination_criteria.MonitorBased {
                    channel_name: "valid_y_misclass",
                    prop_decrease: 0.,
                    N: 100
                },
                !obj:pylearn2.termination_criteria.EpochCounter {
                    max_epochs: %(max_epochs)i
                }
            ]
        },
        update_callbacks: !obj:pylearn2.training_algorithms.sgd.ExponentialDecay {
            decay_factor: 1.00004,
            min_lr: .000001
        }
    },
    extensions: [
        !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor {
            start: 1,
            saturate: 250,
            final_momentum: .7
        }
    ]
}


In [31]:
hyper_params_mlp = {'train_stop' : 50000,
                    'valid_stop' : 60000,
                    'batch_size' : 100,
                    'max_epochs' : 50,
                    'save_path' : '.'}
mlp_yaml = mlp_yaml % (hyper_params_mlp)
print mlp_yaml


!obj:pylearn2.train.Train {
    dataset: &train !obj:pylearn2.datasets.mnist.MNIST {
        which_set: 'train',
        start: 0,
        stop: 50000
    },
    model: !obj:pylearn2.models.mlp.MLP {
        batch_size: 100,
        layers: [
                 !obj:pylearn2.models.mlp.PretrainedLayer {
                     layer_name: 'h1',
                     layer_content: !pkl: "./dae_l1.pkl"
                 },
                 !obj:pylearn2.models.mlp.PretrainedLayer {
                     layer_name: 'h2',
                     layer_content: !pkl: "./dae_l2.pkl"
                 },
                 !obj:pylearn2.models.mlp.Softmax {
                     max_col_norm: 1.9365,
                     layer_name: 'y',
                     n_classes: 10,
                     irange: .005
                 }
                ],
        nvis: 784
    },
    algorithm: !obj:pylearn2.training_algorithms.sgd.SGD {
        learning_rate: .05,
        learning_rule: !obj:pylearn2.training_algorithms.learning_rule.Momentum {
            init_momentum: .5,
        },
        monitoring_dataset:
            {
                'valid' : !obj:pylearn2.datasets.mnist.MNIST {
                              which_set: 'train',
                              start: 50000,
                              stop: 60000
                          },
            },
        cost: !obj:pylearn2.costs.mlp.Default {},
        termination_criterion: !obj:pylearn2.termination_criteria.And {
            criteria: [
                !obj:pylearn2.termination_criteria.MonitorBased {
                    channel_name: "valid_y_misclass",
                    prop_decrease: 0.,
                    N: 100
                },
                !obj:pylearn2.termination_criteria.EpochCounter {
                    max_epochs: 50
                }
            ]
        },
        update_callbacks: !obj:pylearn2.training_algorithms.sgd.ExponentialDecay {
            decay_factor: 1.00004,
            min_lr: .000001
        }
    },
    extensions: [
        !obj:pylearn2.training_algorithms.learning_rule.MomentumAdjustor {
            start: 1,
            saturate: 250,
            final_momentum: .7
        }
    ]
}


In [ ]:
train = yaml_parse.load(mlp_yaml)
train.main_loop()


Parameter and initial learning rate summary:
	vb: 0.05
	hb: 0.05
	W: 0.05
	Wprime: 0.05
	vb: 0.05
	hb: 0.05
	W: 0.05
	Wprime: 0.05
	softmax_b: 0.05
	softmax_W: 0.05
Compiling sgd_update...
Compiling sgd_update done. Time elapsed: 6.700774 seconds
compiling begin_record_entry...
compiling begin_record_entry done. Time elapsed: 0.062979 seconds
Monitored channels: 
	learning_rate
	momentum
	total_seconds_last_epoch
	training_seconds_this_epoch
	valid_objective
	valid_y_col_norms_max
	valid_y_col_norms_mean
	valid_y_col_norms_min
	valid_y_max_max_class
	valid_y_mean_max_class
	valid_y_min_max_class
	valid_y_misclass
	valid_y_nll
	valid_y_row_norms_max
	valid_y_row_norms_mean
	valid_y_row_norms_min
Compiling accum...
graph size: 63
Compiling accum done. Time elapsed: 0.931920 seconds
Monitoring step:
	Epochs seen: 0
	Batches seen: 0
	Examples seen: 0
	learning_rate: 0.05
	momentum: 0.5
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 0.0
	valid_objective: 2.30021407389
	valid_y_col_norms_max: 0.0650027130649
	valid_y_col_norms_mean: 0.064174585385
	valid_y_col_norms_min: 0.0624680393697
	valid_y_max_max_class: 0.106719216561
	valid_y_mean_max_class: 0.103229796303
	valid_y_min_max_class: 0.101238149481
	valid_y_misclass: 0.888
	valid_y_nll: 2.30021407389
	valid_y_row_norms_max: 0.0125483740983
	valid_y_row_norms_mean: 0.00897719438947
	valid_y_row_norms_min: 0.00411556576876
Time this epoch: 13.158766 seconds
Monitoring step:
	Epochs seen: 1
	Batches seen: 500
	Examples seen: 50000
	learning_rate: 0.0490099532688
	momentum: 0.5
	total_seconds_last_epoch: 0.0
	training_seconds_this_epoch: 13.158766
	valid_objective: 0.278234405165
	valid_y_col_norms_max: 1.36132717813
	valid_y_col_norms_mean: 1.24750458117
	valid_y_col_norms_min: 1.07665625258
	valid_y_max_max_class: 0.99967042039
	valid_y_mean_max_class: 0.89333202731
	valid_y_min_max_class: 0.372069926787
	valid_y_misclass: 0.0793
	valid_y_nll: 0.278234405165
	valid_y_row_norms_max: 0.319150639671
	valid_y_row_norms_mean: 0.171802208732
	valid_y_row_norms_min: 0.0399207885787
Time this epoch: 10.836096 seconds
Monitoring step:
	Epochs seen: 2
	Batches seen: 1000
	Examples seen: 100000
	learning_rate: 0.0480395103882
	momentum: 0.500803212851
	total_seconds_last_epoch: 14.150889
	training_seconds_this_epoch: 10.836096
	valid_objective: 0.23955092492
	valid_y_col_norms_max: 1.52508033886
	valid_y_col_norms_mean: 1.4035044461
	valid_y_col_norms_min: 1.23748044277
	valid_y_max_max_class: 0.99983407061
	valid_y_mean_max_class: 0.915776109011
	valid_y_min_max_class: 0.402919864137
	valid_y_misclass: 0.0682
	valid_y_nll: 0.23955092492
	valid_y_row_norms_max: 0.367833987248
	valid_y_row_norms_mean: 0.192808127063
	valid_y_row_norms_min: 0.050357193377
Time this epoch: 9.756829 seconds
Monitoring step:
	Epochs seen: 3
	Batches seen: 1500
	Examples seen: 150000
	learning_rate: 0.0470882831836
	momentum: 0.501606425703
	total_seconds_last_epoch: 11.831991
	training_seconds_this_epoch: 9.756829
	valid_objective: 0.200217975394
	valid_y_col_norms_max: 1.71952750842
	valid_y_col_norms_mean: 1.53547645863
	valid_y_col_norms_min: 1.42110335836
	valid_y_max_max_class: 0.999881443313
	valid_y_mean_max_class: 0.928128320653
	valid_y_min_max_class: 0.414282991949
	valid_y_misclass: 0.0576
	valid_y_nll: 0.200217975394
	valid_y_row_norms_max: 0.40888443791
	valid_y_row_norms_mean: 0.210184773429
	valid_y_row_norms_min: 0.0545657088062
Time this epoch: 11.896243 seconds
Monitoring step:
	Epochs seen: 4
	Batches seen: 2000
	Examples seen: 200000
	learning_rate: 0.0461558911667
	momentum: 0.502409638554
	total_seconds_last_epoch: 10.798661
	training_seconds_this_epoch: 11.896243
	valid_objective: 0.17415213553
	valid_y_col_norms_max: 1.9365
	valid_y_col_norms_mean: 1.65811471015
	valid_y_col_norms_min: 1.47460838036
	valid_y_max_max_class: 0.999914969629
	valid_y_mean_max_class: 0.936481031968
	valid_y_min_max_class: 0.414629329458
	valid_y_misclass: 0.0492
	valid_y_nll: 0.17415213553
	valid_y_row_norms_max: 0.456000087118
	valid_y_row_norms_mean: 0.226211775318
	valid_y_row_norms_min: 0.0622331039523
Time this epoch: 10.715199 seconds
Monitoring step:
	Epochs seen: 5
	Batches seen: 2500
	Examples seen: 250000
	learning_rate: 0.0452419613832
	momentum: 0.503212851406
	total_seconds_last_epoch: 13.013633
	training_seconds_this_epoch: 10.715199
	valid_objective: 0.152688460444
	valid_y_col_norms_max: 1.93623806534
	valid_y_col_norms_mean: 1.75543161873
	valid_y_col_norms_min: 1.505661121
	valid_y_max_max_class: 0.999913916954
	valid_y_mean_max_class: 0.942628733883
	valid_y_min_max_class: 0.431397100703
	valid_y_misclass: 0.0418
	valid_y_nll: 0.152688460444
	valid_y_row_norms_max: 0.505858250232
	valid_y_row_norms_mean: 0.238653730688
	valid_y_row_norms_min: 0.0713204364606
Time this epoch: 10.744419 seconds
Monitoring step:
	Epochs seen: 6
	Batches seen: 3000
	Examples seen: 300000
	learning_rate: 0.0443461282636
	momentum: 0.504016064257
	total_seconds_last_epoch: 11.791187
	training_seconds_this_epoch: 10.744419
	valid_objective: 0.136227230503
	valid_y_col_norms_max: 1.93504255675
	valid_y_col_norms_mean: 1.82769291328
	valid_y_col_norms_min: 1.55100667642
	valid_y_max_max_class: 0.99994637086
	valid_y_mean_max_class: 0.950290047894
	valid_y_min_max_class: 0.452767477444
	valid_y_misclass: 0.038
	valid_y_nll: 0.136227230503
	valid_y_row_norms_max: 0.535950736363
	valid_y_row_norms_mean: 0.247734854542
	valid_y_row_norms_min: 0.0761420530836
Time this epoch: 12.423846 seconds
Monitoring step:
	Epochs seen: 7
	Batches seen: 3500
	Examples seen: 350000
	learning_rate: 0.043468033477
	momentum: 0.504819277108
	total_seconds_last_epoch: 11.836817
	training_seconds_this_epoch: 12.423846
	valid_objective: 0.124043755506
	valid_y_col_norms_max: 1.9365
	valid_y_col_norms_mean: 1.86859504013
	valid_y_col_norms_min: 1.59509095666
	valid_y_max_max_class: 0.999940355648
	valid_y_mean_max_class: 0.953919201627
	valid_y_min_max_class: 0.460544308006
	valid_y_misclass: 0.0359
	valid_y_nll: 0.124043755506
	valid_y_row_norms_max: 0.539856465789
	valid_y_row_norms_mean: 0.252841057904
	valid_y_row_norms_min: 0.0774112465944