In [3]:
# import packages if you miss any, just install them
import csv
from dateutil import parser
from datetime import timedelta
from sklearn import svm
import numpy as np
import pandas as pd
import pdb
import pickle
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
import sklearn
import scipy.stats as ss


import cPickle
import gzip
import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

class dA(object):
    """Denoising Auto-Encoder class (dA)

    A denoising autoencoders tries to reconstruct the input from a corrupted
    version of it by projecting it first in a latent space and reprojecting
    it afterwards back in the input space. Please refer to Vincent et al.,2008
    for more details. If x is the input then equation (1) computes a partially
    destroyed version of x by means of a stochastic mapping q_D. Equation (2)
    computes the projection of the input into the latent space. Equation (3)
    computes the reconstruction of the input, while equation (4) computes the
    reconstruction error.

    .. math::

        \tilde{x} ~ q_D(\tilde{x}|x)                                     (1)

        y = s(W \tilde{x} + b)                                           (2)

        x = s(W' y  + b')                                                (3)

        L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)]      (4)

    """

    def __init__(self, numpy_rng, theano_rng=None, input=None,
                 n_visible=784, n_hidden=500,
                 W=None, bhid=None, bvis=None):
        """
        Initialize the dA class by specifying the number of visible units (the
        dimension d of the input ), the number of hidden units ( the dimension
        d' of the latent or hidden space ) and the corruption level. The
        constructor also receives symbolic variables for the input, weights and
        bias. Such a symbolic variables are useful when, for example the input
        is the result of some computations, or when weights are shared between
        the dA and an MLP layer. When dealing with SdAs this always happens,
        the dA on layer 2 gets as input the output of the dA on layer 1,
        and the weights of the dA are used in the second stage of training
        to construct an MLP.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                     generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone dA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None


        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(numpy_rng.uniform(
                      low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                      high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                      size=(n_visible, n_hidden)), dtype=theano.config.floatX)
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(value=numpy.zeros(n_visible,
                                         dtype=theano.config.floatX),
                                 borrow=True)

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='b',
                                 borrow=True)

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input == None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]

    def get_corrupted_input(self, input, corruption_level):
        """This function keeps ``1-corruption_level`` entries of the inputs the
        same and zero-out randomly selected subset of size ``coruption_level``
        Note : first argument of theano.rng.binomial is the shape(size) of
               random numbers that it should produce
               second argument is the number of trials
               third argument is the probability of success of any trial

                this will produce an array of 0s and 1s where 1 has a
                probability of 1 - ``corruption_level`` and 0 with
                ``corruption_level``

                The binomial function return int64 data type by
                default.  int64 multiplicated by the input
                type(floatX) always return float64.  To keep all data
                in floatX when floatX is float32, we set the dtype of
                the binomial to floatX. As in our case the value of
                the binomial is always 0 or 1, this don't change the
                result. This is needed to allow the gpu to work
                correctly as it only support float32 for now.

        """
        return  self.theano_rng.binomial(size=input.shape, n=1,
                                         p=1 - corruption_level,
                                         dtype=theano.config.floatX) * input

    def get_hidden_values(self, input):
        """ Computes the values of the hidden layer """
        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)

    def get_reconstructed_input(self, hidden):
        """Computes the reconstructed input given the values of the
        hidden layer

        """
        return  T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)

    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        return (cost, updates)
"""
 This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.

 Denoising autoencoders are the building blocks for SdA.
 They are based on auto-encoders as the ones used in Bengio et al. 2007.
 An autoencoder takes an input x and first maps it to a hidden representation
 y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
 latent representation y is then mapped back to a "reconstructed" vector
 z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b').  The weight
 matrix W' can optionally be constrained such that W' = W^T, in which case
 the autoencoder is said to have tied weights. The network is trained such
 that to minimize the reconstruction error (the error between x and z).

 For the denosing autoencoder, during training, first x is corrupted into
 \tilde{x}, where \tilde{x} is a partially destroyed version of x by means
 of a stochastic mapping. Afterwards y is computed as before (using
 \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
 error is now measured between z and the uncorrupted input x, which is
 computed as the cross-entropy :
      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]


 References :
   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
   2008
   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
   Training of Deep Networks, Advances in Neural Information Processing
   Systems 19, 2007

"""
import cPickle
import gzip
import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from dA import dA


class SdA(object):
    """Stacked denoising auto-encoder class (SdA)

    A stacked denoising autoencoder model is obtained by stacking several
    dAs. The hidden layer of the dA at layer `i` becomes the input of
    the dA at layer `i+1`. The first layer dA gets as input the input of
    the SdA, and the hidden layer of the last dA represents the output.
    Note that after pretraining, the SdA is dealt with as a normal MLP,
    the dAs are only used to initialize the weights.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 corruption_levels=[0.1, 0.1]):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the dA

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
                              the dA layers
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(inputs=[index,
                              theano.Param(corruption_level, default=0.2),
                              theano.Param(learning_rate, default=0.1)],
                                 outputs=cost,
                                 updates=updates,
                                 givens={self.x: train_set_x[batch_begin:
                                                             batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(inputs=[index],
              outputs=self.finetune_cost,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]},
              name='train')

        test_score_i = theano.function([index], self.errors,
                 givens={
                   self.x: test_set_x[index * batch_size:
                                      (index + 1) * batch_size],
                   self.y: test_set_y[index * batch_size:
                                      (index + 1) * batch_size]},
                      name='test')

        valid_score_i = theano.function([index], self.errors,
              givens={
                 self.x: valid_set_x[index * batch_size:
                                     (index + 1) * batch_size],
                 self.y: valid_set_y[index * batch_size:
                                     (index + 1) * batch_size]},
                      name='valid')

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score
import scipy as sp
def shared_dataset(data_xy, borrow=True):
    """ Function that loads the dataset into shared variables

    The reason we store our dataset in shared variables is to allow
    Theano to copy it into the GPU memory (when code is run on GPU).
    Since copying data into the GPU is slow, copying a minibatch everytime
    is needed (the default behaviour if the data is not in a shared
    variable) would lead to a large decrease in performance.
    """
    data_x, data_y = data_xy
    shared_x = theano.shared(numpy.asarray(data_x,
                                           dtype=theano.config.floatX),
                             borrow=borrow)
    shared_y = theano.shared(numpy.asarray(data_y,
                                           dtype=theano.config.floatX),
                             borrow=borrow)
    # When storing data on the GPU it has to be stored as floats
    # therefore we will store the labels as ``floatX`` as well
    # (``shared_y`` does exactly that). But during our computations
    # we need them as ints (we use labels as index, and if they are
    # floats it doesn't make sense) therefore instead of returning
    # ``shared_y`` we will have to cast it to int. This little hack
    # lets ous get around this issue
    return shared_x, T.cast(shared_y, 'int32')

######## testining SDA #############
import warnings
warnings.filterwarnings('ignore')

"""
Demonstrates how to train and test a stochastic denoising autoencoder.

This is demonstrated on MNIST.

:type learning_rate: float
:param learning_rate: learning rate used in the finetune stage
(factor for the stochastic gradient)

:type pretraining_epochs: int
:param pretraining_epochs: number of epoch to do pretraining

:type pretrain_lr: float
:param pretrain_lr: learning rate to be used during pre-training

:type n_iter: int
:param n_iter: maximal number of iterations ot run the optimizer

:type dataset: string
:param dataset: path the the pickled dataset

"""

##### create a function to train an Sda and return it.
def trainSda(hidden_layers_sizes = [100, 100, 100], corruption_levels = [0, 0, 0], batch_size = 30 , \
             training_epochs = 100, pretraining_epochs = 100, pretrain_lr = 0.001, finetune_lr=0.1, \
             X_train_minmax = X_train_minmax, y_train = y_train,
             X_validation_minmax = X_validation_minmax, y_validation = y_validation, 
             X_test_minmax = X_test_minmax, y_test = y_test
             ):
    n_visible = X_train_minmax.shape[1]
    # compute number of minibatches for training, validation and testing

    train_set_x, train_set_y = shared_dataset( (X_train_minmax,  y_train), borrow=True)
    valid_set_x, valid_set_y = shared_dataset( (X_validation_minmax,  y_validation), borrow=True)
    test_set_x, test_set_y = shared_dataset( (X_test_minmax,  y_test), borrow=True)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    # numpy random generator
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(numpy_rng=numpy_rng, n_ins=n_visible,
              hidden_layers_sizes= hidden_layers_sizes,
              n_outs=2)
    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise

    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code ran for %.2fm' % ((end_time - start_time) / 60.))

    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    datasets = [(train_set_x, train_set_y) , (valid_set_x, valid_set_y), (test_set_x, test_set_y)]
    train_fn, validate_model, test_model = sda.build_finetune_functions(
                datasets=datasets, batch_size=batch_size,
                learning_rate=finetune_lr)

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (this_validation_loss < best_validation_loss *
                        improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print((' epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break
    return sda


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-2717b550ca3b> in <module>()
    560 
    561 ##### create a function to train an Sda and return it.
--> 562 def trainSda(hidden_layers_sizes = [100, 100, 100], corruption_levels = [0, 0, 0], batch_size = 30 ,              training_epochs = 100, pretraining_epochs = 100, pretrain_lr = 0.001, finetune_lr=0.1,              X_train_minmax = X_train_minmax, y_train = y_train,
    563              X_validation_minmax = X_validation_minmax, y_validation = y_validation,
    564              X_test_minmax = X_test_minmax, y_test = y_test

NameError: name 'X_train_minmax' is not defined

In [3]:


In [25]:
#prepare your data
import numpy as np
import pylab as pl


#transform data to 0.1-0.9 or 0-1


X_train_minmax = 
y_train = 
X_validation_minmax = 
y_validation = 
X_test_minmax = 
y_test =


['active_days', 'average_deaths', 'average_kills', 'average_actual_rating', 'average_duration', 'number_of_games', 'number_of_wins', 'number_of_losses', 'win_rate', 'period_1_df_active_days', 'period_1_df_average_deaths', 'period_1_df_average_kills', 'period_1_df_average_actual_rating', 'period_1_df_average_duration', 'period_1_df_number_of_games', 'period_1_df_number_of_wins', 'period_1_df_number_of_losses', 'period_2_df_active_days', 'period_2_df_average_deaths', 'period_2_df_average_kills', 'period_2_df_average_actual_rating', 'period_2_df_average_duration', 'period_2_df_number_of_games', 'period_2_df_number_of_wins', 'period_2_df_number_of_losses', 'diff_df_active_days', 'diff_df_average_deaths', 'diff_df_average_kills', 'diff_df_average_actual_rating', 'diff_df_average_duration', 'diff_df_number_of_losses', 'diff_df_active_days_percentge', 'diff_df_average_deaths_percentge', 'diff_df_average_kills_percentge', 'diff_df_average_actual_rating_percentge', 'diff_df_average_duration_percentge', 'diff_df_number_of_games_percentge', 'diff_df_number_of_wins_percentge', 'diff_df_number_of_losses_percentge', 'streak_loss', 'last_game_level', 'spent', 'spent_usd', 'log_number_of_games', 'log_number_of_wins', 'log_number_of_losses', 'log_period_1_df_number_of_games', 'log_period_1_df_number_of_wins', 'log_period_1_df_number_of_losses', 'log_period_2_df_number_of_games', 'log_period_2_df_number_of_wins', 'log_period_2_df_number_of_losses', 'log_last_game_level', 'log_diff_df_active_days_percentge', 'log_diff_df_average_deaths_percentge', 'log_diff_df_average_kills_percentge', 'log_diff_df_average_duration_percentge', 'log_diff_df_number_of_games_percentge', 'log_diff_df_number_of_wins_percentge', 'log_diff_df_number_of_losses_percentge']
number of features 60
24573 19658 4915 19658 4915
[ 0.37525215  0.38518842  0.42674664  0.47889133  0.4715159   0.15258374
  0.14640059  0.16274556  0.478348    0.37825076  0.33266711  0.26891371
  0.48797425  0.35599561  0.14730122  0.14380945  0.14015431  0.44721753
  0.34163419  0.36248406  0.47219492  0.43373915  0.1536422   0.14855672
  0.15363489  0.53448339  0.54903072  0.60982959  0.64609468  0.61763463
  0.55375362  0.29593048  0.16045319  0.13092468  0.73007912  0.14778462
  0.112204    0.12003625  0.11449644  0.17485692  0.26900674  0.64828946
  0.24050617  0.36694143  0.42953363  0.46364872  0.37132409  0.32467413
  0.32952524  0.43380445  0.375232    0.39399858  0.45277559  0.56664081
  0.48493057  0.43930199  0.37172646  0.37766699  0.37098112  0.36006259]
number of features 60
0.1 0.9
5971 5971
5971 5971
(11942L, 60L) [1 1 1 ..., 0 0 0]
(11942L, 60L) [0 1 0 ..., 1 0 1]

In [34]:
# set up paramaters

#finetune_lr=0.1
finetune_lr=0.0001
pretraining_epochs = 100
#pretrain_lr=0.001
pretrain_lr = 0.0000001
training_epochs = 100
batch_size = 30


hidden_layers_sizes= [100, 100, 100, 100, 100]
corruption_levels = [0, 0, 0, 0, 0]
#corruption_levels = [0, 0, 0]
sda = trainSda(hidden_layers_sizes = hidden_layers_sizes, corruption_levels = corruption_levels, batch_size = batch_size , \
             training_epochs = training_epochs, pretraining_epochs = pretraining_epochs, 
             pretrain_lr = pretrain_lr, finetune_lr=finetune_lr, \
             X_train_minmax = X_train_minmax, y_train = y_train,
             X_validation_minmax = X_validation_minmax, y_validation = y_validation, 
             X_test_minmax = X_test_minmax, y_test = y_test
             )


... building the model
... getting the pretraining functions
... pre-training the model
Pre-training layer 0, epoch 0, cost  86.0520980387
Pre-training layer 0, epoch 1, cost  86.0523170758
Pre-training layer 0, epoch 2, cost  86.0162159952
Pre-training layer 0, epoch 3, cost  86.0662843983
Pre-training layer 0, epoch 4, cost  86.0137633454
Pre-training layer 0, epoch 5, cost  85.9297987808
Pre-training layer 0, epoch 6, cost  85.9454868606
Pre-training layer 0, epoch 7, cost  85.9264792157
Pre-training layer 0, epoch 8, cost  85.8946182982
Pre-training layer 0, epoch 9, cost  85.854568457
Pre-training layer 0, epoch 10, cost  85.8967150755
Pre-training layer 0, epoch 11, cost  85.8689507294
Pre-training layer 0, epoch 12, cost  85.8917453961
Pre-training layer 0, epoch 13, cost  85.8479150506
Pre-training layer 0, epoch 14, cost  85.8541065198
Pre-training layer 0, epoch 15, cost  85.8010437595
Pre-training layer 0, epoch 16, cost  85.8270629378
Pre-training layer 0, epoch 17, cost  85.7005681278
Pre-training layer 0, epoch 18, cost  85.7885660205
Pre-training layer 0, epoch 19, cost  85.7078024802
Pre-training layer 0, epoch 20, cost  85.7350028976
Pre-training layer 0, epoch 21, cost  85.7116208276
Pre-training layer 0, epoch 22, cost  85.6338836351
Pre-training layer 0, epoch 23, cost  85.6841247653
Pre-training layer 0, epoch 24, cost  85.6541130891
Pre-training layer 0, epoch 25, cost  85.6100037843
Pre-training layer 0, epoch 26, cost  85.6143064814
Pre-training layer 0, epoch 27, cost  85.6826372886
Pre-training layer 0, epoch 28, cost  85.5537682375
Pre-training layer 0, epoch 29, cost  85.6135469916
Pre-training layer 0, epoch 30, cost  85.5280633111
Pre-training layer 0, epoch 31, cost  85.5401931601
Pre-training layer 0, epoch 32, cost  85.4934264824
Pre-training layer 0, epoch 33, cost  85.5291817998
Pre-training layer 0, epoch 34, cost  85.4592333992
Pre-training layer 0, epoch 35, cost  85.4355441848
Pre-training layer 0, epoch 36, cost  85.4008581035
Pre-training layer 0, epoch 37, cost  85.3860213851
Pre-training layer 0, epoch 38, cost  85.3317791144
Pre-training layer 0, epoch 39, cost  85.3777292693
Pre-training layer 0, epoch 40, cost  85.3333234443
Pre-training layer 0, epoch 41, cost  85.300681557
Pre-training layer 0, epoch 42, cost  85.3707914556
Pre-training layer 0, epoch 43, cost  85.3417986908
Pre-training layer 0, epoch 44, cost  85.2761745183
Pre-training layer 0, epoch 45, cost  85.2824954677
Pre-training layer 0, epoch 46, cost  85.2195448158
Pre-training layer 0, epoch 47, cost  85.1942038458
Pre-training layer 0, epoch 48, cost  85.2953181591
Pre-training layer 0, epoch 49, cost  85.2319294799
Pre-training layer 0, epoch 50, cost  85.1708835144
Pre-training layer 0, epoch 51, cost  85.1794292112
Pre-training layer 0, epoch 52, cost  85.1126228404
Pre-training layer 0, epoch 53, cost  85.1099453706
Pre-training layer 0, epoch 54, cost  85.1055937709
Pre-training layer 0, epoch 55, cost  85.0915509492
Pre-training layer 0, epoch 56, cost  85.0574736108
Pre-training layer 0, epoch 57, cost  85.0673709244
Pre-training layer 0, epoch 58, cost  85.0650668542
Pre-training layer 0, epoch 59, cost  84.9809744385
Pre-training layer 0, epoch 60, cost  85.0183512707
Pre-training layer 0, epoch 61, cost  85.1013381794
Pre-training layer 0, epoch 62, cost  84.9327026639
Pre-training layer 0, epoch 63, cost  84.9215115541
Pre-training layer 0, epoch 64, cost  84.924839114
Pre-training layer 0, epoch 65, cost  84.9154205107
Pre-training layer 0, epoch 66, cost  84.9295396994
Pre-training layer 0, epoch 67, cost  84.9061699823
Pre-training layer 0, epoch 68, cost  84.8826887442
Pre-training layer 0, epoch 69, cost  84.9261175394
Pre-training layer 0, epoch 70, cost  84.7915339804
Pre-training layer 0, epoch 71, cost  84.7871725524
Pre-training layer 0, epoch 72, cost  84.8115314523
Pre-training layer 0, epoch 73, cost  84.783434179
Pre-training layer 0, epoch 74, cost  84.8171982562
Pre-training layer 0, epoch 75, cost  84.7421891572
Pre-training layer 0, epoch 76, cost  84.7597433618
Pre-training layer 0, epoch 77, cost  84.7168563481
Pre-training layer 0, epoch 78, cost  84.7803854057
Pre-training layer 0, epoch 79, cost  84.6524016831
Pre-training layer 0, epoch 80, cost  84.6104171601
Pre-training layer 0, epoch 81, cost  84.6215239734
Pre-training layer 0, epoch 82, cost  84.6436763482
Pre-training layer 0, epoch 83, cost  84.6570200226
Pre-training layer 0, epoch 84, cost  84.6525154249
Pre-training layer 0, epoch 85, cost  84.561856436
Pre-training layer 0, epoch 86, cost  84.5595827971
Pre-training layer 0, epoch 87, cost  84.6039922567
Pre-training layer 0, epoch 88, cost  84.5583337421
Pre-training layer 0, epoch 89, cost  84.4810240365
Pre-training layer 0, epoch 90, cost  84.4854580998
Pre-training layer 0, epoch 91, cost  84.5071500267
Pre-training layer 0, epoch 92, cost  84.4339494876
Pre-training layer 0, epoch 93, cost  84.4726957958
Pre-training layer 0, epoch 94, cost  84.4772169649
Pre-training layer 0, epoch 95, cost  84.4160685565
Pre-training layer 0, epoch 96, cost  84.4373476289
Pre-training layer 0, epoch 97, cost  84.4774113049
Pre-training layer 0, epoch 98, cost  84.4243109608
Pre-training layer 0, epoch 99, cost  84.37756064
Pre-training layer 1, epoch 0, cost  120.04167726
Pre-training layer 1, epoch 1, cost  120.148723078
Pre-training layer 1, epoch 2, cost  120.201068361
Pre-training layer 1, epoch 3, cost  120.074753194
Pre-training layer 1, epoch 4, cost  120.010091612
Pre-training layer 1, epoch 5, cost  120.020362318
Pre-training layer 1, epoch 6, cost  119.957695663
Pre-training layer 1, epoch 7, cost  120.007070046
Pre-training layer 1, epoch 8, cost  119.929746771
Pre-training layer 1, epoch 9, cost  120.029097827
Pre-training layer 1, epoch 10, cost  119.986435286
Pre-training layer 1, epoch 11, cost  119.879922907
Pre-training layer 1, epoch 12, cost  119.807813486
Pre-training layer 1, epoch 13, cost  119.864403065
Pre-training layer 1, epoch 14, cost  119.793988531
Pre-training layer 1, epoch 15, cost  119.773958747
Pre-training layer 1, epoch 16, cost  119.798331474
Pre-training layer 1, epoch 17, cost  119.724980478
Pre-training layer 1, epoch 18, cost  119.704672539
Pre-training layer 1, epoch 19, cost  119.628568672
Pre-training layer 1, epoch 20, cost  119.785392794
Pre-training layer 1, epoch 21, cost  119.660154655
Pre-training layer 1, epoch 22, cost  119.616467335
Pre-training layer 1, epoch 23, cost  119.661672191
Pre-training layer 1, epoch 24, cost  119.605910913
Pre-training layer 1, epoch 25, cost  119.635648135
Pre-training layer 1, epoch 26, cost  119.556256933
Pre-training layer 1, epoch 27, cost  119.614929091
Pre-training layer 1, epoch 28, cost  119.607946746
Pre-training layer 1, epoch 29, cost  119.553416233
Pre-training layer 1, epoch 30, cost  119.424054115
Pre-training layer 1, epoch 31, cost  119.482372744
Pre-training layer 1, epoch 32, cost  119.463006855
Pre-training layer 1, epoch 33, cost  119.33868847
Pre-training layer 1, epoch 34, cost  119.46085678
Pre-training layer 1, epoch 35, cost  119.462766684
Pre-training layer 1, epoch 36, cost  119.387866887
Pre-training layer 1, epoch 37, cost  119.373084323
Pre-training layer 1, epoch 38, cost  119.34124663
Pre-training layer 1, epoch 39, cost  119.321528072
Pre-training layer 1, epoch 40, cost  119.308129376
Pre-training layer 1, epoch 41, cost  119.229814357
Pre-training layer 1, epoch 42, cost  119.219543976
Pre-training layer 1, epoch 43, cost  119.168439476
Pre-training layer 1, epoch 44, cost  119.273905914
Pre-training layer 1, epoch 45, cost  119.165972912
Pre-training layer 1, epoch 46, cost  119.113873458
Pre-training layer 1, epoch 47, cost  119.087058892
Pre-training layer 1, epoch 48, cost  119.12399458
Pre-training layer 1, epoch 49, cost  119.08475091
Pre-training layer 1, epoch 50, cost  119.064857159
Pre-training layer 1, epoch 51, cost  119.057661931
Pre-training layer 1, epoch 52, cost  119.117785648
Pre-training layer 1, epoch 53, cost  118.994027234
Pre-training layer 1, epoch 54, cost  118.964976498
Pre-training layer 1, epoch 55, cost  118.886614843
Pre-training layer 1, epoch 56, cost  119.027403449
Pre-training layer 1, epoch 57, cost  118.978050466
Pre-training layer 1, epoch 58, cost  118.907292937
Pre-training layer 1, epoch 59, cost  118.87535125
Pre-training layer 1, epoch 60, cost  118.889270152
Pre-training layer 1, epoch 61, cost  118.856566795
Pre-training layer 1, epoch 62, cost  118.815618666
Pre-training layer 1, epoch 63, cost  118.858050304
Pre-training layer 1, epoch 64, cost  118.723289586
Pre-training layer 1, epoch 65, cost  118.710097307
Pre-training layer 1, epoch 66, cost  118.689242235
Pre-training layer 1, epoch 67, cost  118.647859081
Pre-training layer 1, epoch 68, cost  118.78559318
Pre-training layer 1, epoch 69, cost  118.69127503
Pre-training layer 1, epoch 70, cost  118.677101418
Pre-training layer 1, epoch 71, cost  118.7314598
Pre-training layer 1, epoch 72, cost  118.627939149
Pre-training layer 1, epoch 73, cost  118.558374323
Pre-training layer 1, epoch 74, cost  118.595796915
Pre-training layer 1, epoch 75, cost  118.581273628
Pre-training layer 1, epoch 76, cost  118.514124838
Pre-training layer 1, epoch 77, cost  118.484696894
Pre-training layer 1, epoch 78, cost  118.544423055
Pre-training layer 1, epoch 79, cost  118.409903046
Pre-training layer 1, epoch 80, cost  118.453221296
Pre-training layer 1, epoch 81, cost  118.44961203
Pre-training layer 1, epoch 82, cost  118.355619345
Pre-training layer 1, epoch 83, cost  118.419266687
Pre-training layer 1, epoch 84, cost  118.430297674
Pre-training layer 1, epoch 85, cost  118.477134445
Pre-training layer 1, epoch 86, cost  118.342147413
Pre-training layer 1, epoch 87, cost  118.294058598
Pre-training layer 1, epoch 88, cost  118.179361545
Pre-training layer 1, epoch 89, cost  118.312090322
Pre-training layer 1, epoch 90, cost  118.178962624
Pre-training layer 1, epoch 91, cost  118.310129348
Pre-training layer 1, epoch 92, cost  118.173836028
Pre-training layer 1, epoch 93, cost  118.197473948
Pre-training layer 1, epoch 94, cost  118.149395684
Pre-training layer 1, epoch 95, cost  118.01145935
Pre-training layer 1, epoch 96, cost  118.036062277
Pre-training layer 1, epoch 97, cost  118.099029316
Pre-training layer 1, epoch 98, cost  118.013782374
Pre-training layer 1, epoch 99, cost  118.050258786
Pre-training layer 2, epoch 0, cost  102.979979273
Pre-training layer 2, epoch 1, cost  102.892730568
Pre-training layer 2, epoch 2, cost  102.906896775
Pre-training layer 2, epoch 3, cost  102.865913454
Pre-training layer 2, epoch 4, cost  102.939482234
Pre-training layer 2, epoch 5, cost  102.847458466
Pre-training layer 2, epoch 6, cost  102.825651969
Pre-training layer 2, epoch 7, cost  102.877913707
Pre-training layer 2, epoch 8, cost  102.709122203
Pre-training layer 2, epoch 9, cost  102.791562728
Pre-training layer 2, epoch 10, cost  102.782213288
Pre-training layer 2, epoch 11, cost  102.70174152
Pre-training layer 2, epoch 12, cost  102.672946028
Pre-training layer 2, epoch 13, cost  102.687665487
Pre-training layer 2, epoch 14, cost  102.711737335
Pre-training layer 2, epoch 15, cost  102.676239452
Pre-training layer 2, epoch 16, cost  102.648815571
Pre-training layer 2, epoch 17, cost  102.61848983
Pre-training layer 2, epoch 18, cost  102.626025569
Pre-training layer 2, epoch 19, cost  102.625346877
Pre-training layer 2, epoch 20, cost  102.485976575
Pre-training layer 2, epoch 21, cost  102.557377807
Pre-training layer 2, epoch 22, cost  102.499268792
Pre-training layer 2, epoch 23, cost  102.479950913
Pre-training layer 2, epoch 24, cost  102.466992735
Pre-training layer 2, epoch 25, cost  102.565918624
Pre-training layer 2, epoch 26, cost  102.361639197
Pre-training layer 2, epoch 27, cost  102.370552028
Pre-training layer 2, epoch 28, cost  102.428720084
Pre-training layer 2, epoch 29, cost  102.238098155
Pre-training layer 2, epoch 30, cost  102.360795465
Pre-training layer 2, epoch 31, cost  102.340347275
Pre-training layer 2, epoch 32, cost  102.284721811
Pre-training layer 2, epoch 33, cost  102.254998098
Pre-training layer 2, epoch 34, cost  102.306892034
Pre-training layer 2, epoch 35, cost  102.33319181
Pre-training layer 2, epoch 36, cost  102.2830892
Pre-training layer 2, epoch 37, cost  102.295431314
Pre-training layer 2, epoch 38, cost  102.191311241
Pre-training layer 2, epoch 39, cost  102.181744975
Pre-training layer 2, epoch 40, cost  102.166452736
Pre-training layer 2, epoch 41, cost  102.195142692
Pre-training layer 2, epoch 42, cost  102.245311625
Pre-training layer 2, epoch 43, cost  102.126320234
Pre-training layer 2, epoch 44, cost  102.198833069
Pre-training layer 2, epoch 45, cost  102.201394413
Pre-training layer 2, epoch 46, cost  101.982644842
Pre-training layer 2, epoch 47, cost  102.101559166
Pre-training layer 2, epoch 48, cost  102.048907284
Pre-training layer 2, epoch 49, cost  102.031120414
Pre-training layer 2, epoch 50, cost  101.892038285
Pre-training layer 2, epoch 51, cost  101.919592537
Pre-training layer 2, epoch 52, cost  102.00326011
Pre-training layer 2, epoch 53, cost  101.914971577
Pre-training layer 2, epoch 54, cost  101.966360073
Pre-training layer 2, epoch 55, cost  101.887149883
Pre-training layer 2, epoch 56, cost  101.784564734
Pre-training layer 2, epoch 57, cost  101.853794427
Pre-training layer 2, epoch 58, cost  101.83142531
Pre-training layer 2, epoch 59, cost  101.727871073
Pre-training layer 2, epoch 60, cost  101.795362497
Pre-training layer 2, epoch 61, cost  101.927931175
Pre-training layer 2, epoch 62, cost  101.874952621
Pre-training layer 2, epoch 63, cost  101.756708163
Pre-training layer 2, epoch 64, cost  101.686725244
Pre-training layer 2, epoch 65, cost  101.801214619
Pre-training layer 2, epoch 66, cost  101.744759692
Pre-training layer 2, epoch 67, cost  101.684334884
Pre-training layer 2, epoch 68, cost  101.537072477
Pre-training layer 2, epoch 69, cost  101.662950279
Pre-training layer 2, epoch 70, cost  101.609808846
Pre-training layer 2, epoch 71, cost  101.569662138
Pre-training layer 2, epoch 72, cost  101.68527935
Pre-training layer 2, epoch 73, cost  101.589143595
Pre-training layer 2, epoch 74, cost  101.583977233
Pre-training layer 2, epoch 75, cost  101.60282173
Pre-training layer 2, epoch 76, cost  101.539694669
Pre-training layer 2, epoch 77, cost  101.476755032
Pre-training layer 2, epoch 78, cost  101.45348654
Pre-training layer 2, epoch 79, cost  101.463147326
Pre-training layer 2, epoch 80, cost  101.488831085
Pre-training layer 2, epoch 81, cost  101.412811409
Pre-training layer 2, epoch 82, cost  101.442730499
Pre-training layer 2, epoch 83, cost  101.478483516
Pre-training layer 2, epoch 84, cost  101.282793848
Pre-training layer 2, epoch 85, cost  101.437331139
Pre-training layer 2, epoch 86, cost  101.40659022
Pre-training layer 2, epoch 87, cost  101.27879983
Pre-training layer 2, epoch 88, cost  101.313355863
Pre-training layer 2, epoch 89, cost  101.317645969
Pre-training layer 2, epoch 90, cost  101.280986435
Pre-training layer 2, epoch 91, cost  101.264908092
Pre-training layer 2, epoch 92, cost  101.137971854
Pre-training layer 2, epoch 93, cost  101.218142022
Pre-training layer 2, epoch 94, cost  101.149898054
Pre-training layer 2, epoch 95, cost  101.107399823
Pre-training layer 2, epoch 96, cost  101.174127588
Pre-training layer 2, epoch 97, cost  101.01567959
Pre-training layer 2, epoch 98, cost  101.066595353
Pre-training layer 2, epoch 99, cost  101.075358856
Pre-training layer 3, epoch 0, cost  110.348460663
Pre-training layer 3, epoch 1, cost  110.294194496
Pre-training layer 3, epoch 2, cost  110.326489924
Pre-training layer 3, epoch 3, cost  110.310193089
Pre-training layer 3, epoch 4, cost  110.301588029
Pre-training layer 3, epoch 5, cost  110.334155325
Pre-training layer 3, epoch 6, cost  110.178313016
Pre-training layer 3, epoch 7, cost  110.144553675
Pre-training layer 3, epoch 8, cost  110.207346421
Pre-training layer 3, epoch 9, cost  110.215699381
Pre-training layer 3, epoch 10, cost  110.134934838
Pre-training layer 3, epoch 11, cost  110.066433092
Pre-training layer 3, epoch 12, cost  110.154097934
Pre-training layer 3, epoch 13, cost  110.027759614
Pre-training layer 3, epoch 14, cost  110.053959416
Pre-training layer 3, epoch 15, cost  109.946707297
Pre-training layer 3, epoch 16, cost  110.035649561
Pre-training layer 3, epoch 17, cost  110.022459898
Pre-training layer 3, epoch 18, cost  109.916518166
Pre-training layer 3, epoch 19, cost  109.973208919
Pre-training layer 3, epoch 20, cost  109.824002656
Pre-training layer 3, epoch 21, cost  109.906485526
Pre-training layer 3, epoch 22, cost  109.831259092
Pre-training layer 3, epoch 23, cost  109.681390328
Pre-training layer 3, epoch 24, cost  109.895433208
Pre-training layer 3, epoch 25, cost  109.625666825
Pre-training layer 3, epoch 26, cost  109.817549253
Pre-training layer 3, epoch 27, cost  109.631081468
Pre-training layer 3, epoch 28, cost  109.695176209
Pre-training layer 3, epoch 29, cost  109.66549844
Pre-training layer 3, epoch 30, cost  109.635546594
Pre-training layer 3, epoch 31, cost  109.678730934
Pre-training layer 3, epoch 32, cost  109.537792194
Pre-training layer 3, epoch 33, cost  109.532256284
Pre-training layer 3, epoch 34, cost  109.522995101
Pre-training layer 3, epoch 35, cost  109.509123495
Pre-training layer 3, epoch 36, cost  109.538502273
Pre-training layer 3, epoch 37, cost  109.49120824
Pre-training layer 3, epoch 38, cost  109.461571629
Pre-training layer 3, epoch 39, cost  109.429419538
Pre-training layer 3, epoch 40, cost  109.367305338
Pre-training layer 3, epoch 41, cost  109.457630785
Pre-training layer 3, epoch 42, cost  109.356018439
Pre-training layer 3, epoch 43, cost  109.391652002
Pre-training layer 3, epoch 44, cost  109.20634512
Pre-training layer 3, epoch 45, cost  109.226661543
Pre-training layer 3, epoch 46, cost  109.33515243
Pre-training layer 3, epoch 47, cost  109.20625608
Pre-training layer 3, epoch 48, cost  109.244118012
Pre-training layer 3, epoch 49, cost  109.16080083
Pre-training layer 3, epoch 50, cost  109.137173847
Pre-training layer 3, epoch 51, cost  109.138755158
Pre-training layer 3, epoch 52, cost  109.156806288
Pre-training layer 3, epoch 53, cost  109.061392866
Pre-training layer 3, epoch 54, cost  109.056655401
Pre-training layer 3, epoch 55, cost  109.10132618
Pre-training layer 3, epoch 56, cost  109.02561974
Pre-training layer 3, epoch 57, cost  109.105363076
Pre-training layer 3, epoch 58, cost  108.979783275
Pre-training layer 3, epoch 59, cost  109.030972048
Pre-training layer 3, epoch 60, cost  108.927918428
Pre-training layer 3, epoch 61, cost  108.949427083
Pre-training layer 3, epoch 62, cost  108.929426993
Pre-training layer 3, epoch 63, cost  108.939812575
Pre-training layer 3, epoch 64, cost  108.802513247
Pre-training layer 3, epoch 65, cost  108.852627513
Pre-training layer 3, epoch 66, cost  108.773732345
Pre-training layer 3, epoch 67, cost  108.875693422
Pre-training layer 3, epoch 68, cost  108.744702491
Pre-training layer 3, epoch 69, cost  108.689969512
Pre-training layer 3, epoch 70, cost  108.78405862
Pre-training layer 3, epoch 71, cost  108.682696046
Pre-training layer 3, epoch 72, cost  108.66014117
Pre-training layer 3, epoch 73, cost  108.589529015
Pre-training layer 3, epoch 74, cost  108.771185156
Pre-training layer 3, epoch 75, cost  108.592983857
Pre-training layer 3, epoch 76, cost  108.67859886
Pre-training layer 3, epoch 77, cost  108.50891392
Pre-training layer 3, epoch 78, cost  108.46324131
Pre-training layer 3, epoch 79, cost  108.500191687
Pre-training layer 3, epoch 80, cost  108.471373386
Pre-training layer 3, epoch 81, cost  108.485863119
Pre-training layer 3, epoch 82, cost  108.410880535
Pre-training layer 3, epoch 83, cost  108.36804948
Pre-training layer 3, epoch 84, cost  108.411404926
Pre-training layer 3, epoch 85, cost  108.283414503
Pre-training layer 3, epoch 86, cost  108.314309942
Pre-training layer 3, epoch 87, cost  108.268094023
Pre-training layer 3, epoch 88, cost  108.368275519
Pre-training layer 3, epoch 89, cost  108.30904627
Pre-training layer 3, epoch 90, cost  108.254233832
Pre-training layer 3, epoch 91, cost  108.157572649
Pre-training layer 3, epoch 92, cost  108.162751739
Pre-training layer 3, epoch 93, cost  108.103830401
Pre-training layer 3, epoch 94, cost  108.180339451
Pre-training layer 3, epoch 95, cost  108.112519442
Pre-training layer 3, epoch 96, cost  108.22976855
Pre-training layer 3, epoch 97, cost  108.033264521
Pre-training layer 3, epoch 98, cost  108.146053671
Pre-training layer 3, epoch 99, cost  108.175624168
Pre-training layer 4, epoch 0, cost  109.760780629
Pre-training layer 4, epoch 1, cost  109.791027262
Pre-training layer 4, epoch 2, cost  109.677193921
Pre-training layer 4, epoch 3, cost  109.704580394
Pre-training layer 4, epoch 4, cost  109.663741379
Pre-training layer 4, epoch 5, cost  109.613051455
Pre-training layer 4, epoch 6, cost  109.625968721
Pre-training layer 4, epoch 7, cost  109.571784608
Pre-training layer 4, epoch 8, cost  109.545691508
Pre-training layer 4, epoch 9, cost  109.580372801
Pre-training layer 4, epoch 10, cost  109.526637117
Pre-training layer 4, epoch 11, cost  109.538231386
Pre-training layer 4, epoch 12, cost  109.421155315
Pre-training layer 4, epoch 13, cost  109.495692336
Pre-training layer 4, epoch 14, cost  109.400172079
Pre-training layer 4, epoch 15, cost  109.412470755
Pre-training layer 4, epoch 16, cost  109.371260189
Pre-training layer 4, epoch 17, cost  109.347343732
Pre-training layer 4, epoch 18, cost  109.298175733
Pre-training layer 4, epoch 19, cost  109.276424429
Pre-training layer 4, epoch 20, cost  109.226823922
Pre-training layer 4, epoch 21, cost  109.186827868
Pre-training layer 4, epoch 22, cost  109.216785402
Pre-training layer 4, epoch 23, cost  109.183965817
Pre-training layer 4, epoch 24, cost  109.17756837
Pre-training layer 4, epoch 25, cost  109.096073267
Pre-training layer 4, epoch 26, cost  109.145484324
Pre-training layer 4, epoch 27, cost  109.084040561
Pre-training layer 4, epoch 28, cost  109.02922001
Pre-training layer 4, epoch 29, cost  109.080981958
Pre-training layer 4, epoch 30, cost  108.996289147
Pre-training layer 4, epoch 31, cost  108.925253089
Pre-training layer 4, epoch 32, cost  108.863912645
Pre-training layer 4, epoch 33, cost  108.958547688
Pre-training layer 4, epoch 34, cost  108.865032913
Pre-training layer 4, epoch 35, cost  108.843842637
Pre-training layer 4, epoch 36, cost  108.885427896
Pre-training layer 4, epoch 37, cost  108.735075102
Pre-training layer 4, epoch 38, cost  108.816705907
Pre-training layer 4, epoch 39, cost  108.806894995
Pre-training layer 4, epoch 40, cost  108.697422482
Pre-training layer 4, epoch 41, cost  108.769003965
Pre-training layer 4, epoch 42, cost  108.736860115
Pre-training layer 4, epoch 43, cost  108.57022538
Pre-training layer 4, epoch 44, cost  108.659185951
Pre-training layer 4, epoch 45, cost  108.552317356
Pre-training layer 4, epoch 46, cost  108.606588947
Pre-training layer 4, epoch 47, cost  108.508753607
Pre-training layer 4, epoch 48, cost  108.485403062
Pre-training layer 4, epoch 49, cost  108.559900149
Pre-training layer 4, epoch 50, cost  108.50862432
Pre-training layer 4, epoch 51, cost  108.550065976
Pre-training layer 4, epoch 52, cost  108.427117725
Pre-training layer 4, epoch 53, cost  108.279620628
Pre-training layer 4, epoch 54, cost  108.31948951
Pre-training layer 4, epoch 55, cost  108.280761847
Pre-training layer 4, epoch 56, cost  108.301432451
Pre-training layer 4, epoch 57, cost  108.218429469
Pre-training layer 4, epoch 58, cost  108.259552598
Pre-training layer 4, epoch 59, cost  108.250671579
Pre-training layer 4, epoch 60, cost  108.168835767
Pre-training layer 4, epoch 61, cost  108.140077522
Pre-training layer 4, epoch 62, cost  108.157146486
Pre-training layer 4, epoch 63, cost  108.107220836
Pre-training layer 4, epoch 64, cost  108.119040316
Pre-training layer 4, epoch 65, cost  108.084520941
Pre-training layer 4, epoch 66, cost  108.182092202
Pre-training layer 4, epoch 67, cost  107.951214969
Pre-training layer 4, epoch 68, cost  108.051689108
Pre-training layer 4, epoch 69, cost  107.9852174
Pre-training layer 4, epoch 70, cost  107.925179954
Pre-training layer 4, epoch 71, cost  107.862862042
Pre-training layer 4, epoch 72, cost  107.87824142
Pre-training layer 4, epoch 73, cost  107.824514162
Pre-training layer 4, epoch 74, cost  107.911320678
Pre-training layer 4, epoch 75, cost  107.851154202
Pre-training layer 4, epoch 76, cost  107.712329676
Pre-training layer 4, epoch 77, cost  107.83576585
Pre-training layer 4, epoch 78, cost  107.706395344
Pre-training layer 4, epoch 79, cost  107.744800783
Pre-training layer 4, epoch 80, cost  107.622367578
Pre-training layer 4, epoch 81, cost  107.638130366
Pre-training layer 4, epoch 82, cost  107.679877254
Pre-training layer 4, epoch 83, cost  107.572324653
Pre-training layer 4, epoch 84, cost  107.544645642
Pre-training layer 4, epoch 85, cost  107.568201794
Pre-training layer 4, epoch 86, cost  107.491871692
Pre-training layer 4, epoch 87, cost  107.468087302
Pre-training layer 4, epoch 88, cost  107.439382372
Pre-training layer 4, epoch 89, cost  107.448598929
Pre-training layer 4, epoch 90, cost  107.420723818
Pre-training layer 4, epoch 91, cost  107.401588828
Pre-training layer 4, epoch 92, cost  107.291857629
Pre-training layer 4, epoch 93, cost  107.321131567
Pre-training layer 4, epoch 94, cost  107.364135082
Pre-training layer 4, epoch 95, cost  107.287676024
Pre-training layer 4, epoch 96, cost  107.319986969
Pre-training layer 4, epoch 97, cost  107.198214596
Pre-training layer 4, epoch 98, cost  107.271867616
Pre-training layer 4, epoch 99, cost  107.256513676
... getting the finetuning functions
... finetunning the model
epoch 1, minibatch 398/398, validation error 63.358779 %
 epoch 1, minibatch 398/398, test error of best model 62.351738 %
epoch 2, minibatch 398/398, validation error 63.358779 %
epoch 3, minibatch 398/398, validation error 63.358779 %
epoch 4, minibatch 398/398, validation error 63.358779 %
epoch 5, minibatch 398/398, validation error 63.358779 %
epoch 6, minibatch 398/398, validation error 63.358779 %
epoch 7, minibatch 398/398, validation error 63.358779 %
epoch 8, minibatch 398/398, validation error 63.358779 %
epoch 9, minibatch 398/398, validation error 63.358779 %
epoch 10, minibatch 398/398, validation error 63.358779 %
The pretraining code ran for 4.48m

In [ ]:
# this part is for testing the Sda model and report performance.

train_set_x, train_set_y = shared_dataset( (X_train_minmax,  y_train), borrow=True)
valid_set_x, valid_set_y = shared_dataset( (X_validation_minmax,  y_validation), borrow=True)
test_set_x, test_set_y = shared_dataset( (X_test_minmax,  y_test), borrow=True)

print 'hidden_layers_sizes:', hidden_layers_sizes
print 'corruption_levels:', corruption_levels


predict_train = theano.function([], sda.logLayer.y_pred,
                   givens={sda.x: train_set_x})
training_predicted = predict_train()

print 'train accuracy: ', '{percent:.1%}'.format(percent=sklearn.metrics.accuracy_score(y_train, training_predicted)) 
print 'precision: ', '{percent:.1%}'.format(percent=sklearn.metrics.precision_score(y_train, training_predicted, pos_label=1)) 
print 'recall: ', '{percent:.1%}'.format( percent= sklearn.metrics.recall_score(y_train, training_predicted, pos_label=1))

predict = theano.function([], sda.logLayer.y_pred,
                   givens={sda.x: test_set_x})
test_predicted = predict()
print 'testing accuracy: ', '{percent:.1%}'.format(percent=sklearn.metrics.accuracy_score(y_test, test_predicted)) 
print 'precision: ', '{percent:.1%}'.format(percent=sklearn.metrics.precision_score(y_test, test_predicted, pos_label=1)) 
print 'recall: ', '{percent:.1%}'.format( percent= sklearn.metrics.recall_score(y_test, test_predicted, pos_label=1))