In [1]:
% load SdA.py
In [ ]:
"""
This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.
Denoising autoencoders are the building blocks for SdA.
They are based on auto-encoders as the ones used in Bengio et al. 2007.
An autoencoder takes an input x and first maps it to a hidden representation
y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
latent representation y is then mapped back to a "reconstructed" vector
z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight
matrix W' can optionally be constrained such that W' = W^T, in which case
the autoencoder is said to have tied weights. The network is trained such
that to minimize the reconstruction error (the error between x and z).
For the denosing autoencoder, during training, first x is corrupted into
\tilde{x}, where \tilde{x} is a partially destroyed version of x by means
of a stochastic mapping. Afterwards y is computed as before (using
\tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
error is now measured between z and the uncorrupted input x, which is
computed as the cross-entropy :
- \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
References :
- P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
2008
- Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
Training of Deep Networks, Advances in Neural Information Processing
Systems 19, 2007
"""
import cPickle
import gzip
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from dA import dA
class SdA(object):
"""Stacked denoising auto-encoder class (SdA)
A stacked denoising autoencoder model is obtained by stacking several
dAs. The hidden layer of the dA at layer `i` becomes the input of
the dA at layer `i+1`. The first layer dA gets as input the input of
the SdA, and the hidden layer of the last dA represents the output.
Note that after pretraining, the SdA is dealt with as a normal MLP,
the dAs are only used to initialize the weights.
"""
def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
hidden_layers_sizes=[500, 500], n_outs=10,
corruption_levels=[0.1, 0.1]):
""" This class is made to support a variable number of layers.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: numpy random number generator used to draw initial
weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type n_ins: int
:param n_ins: dimension of the input to the sdA
:type n_layers_sizes: list of ints
:param n_layers_sizes: intermediate layers size, must contain
at least one value
:type n_outs: int
:param n_outs: dimension of the output of the network
:type corruption_levels: list of float
:param corruption_levels: amount of corruption to use for each
layer
"""
self.sigmoid_layers = []
self.dA_layers = []
self.params = []
self.n_layers = len(hidden_layers_sizes)
assert self.n_layers > 0
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# allocate symbolic variables for the data
self.x = T.matrix('x') # the data is presented as rasterized images
self.y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
# The SdA is an MLP, for which all weights of intermediate layers
# are shared with a different denoising autoencoders
# We will first construct the SdA as a deep multilayer perceptron,
# and when constructing each sigmoidal layer we also construct a
# denoising autoencoder that shares weights with that layer
# During pretraining we will train these autoencoders (which will
# lead to chainging the weights of the MLP as well)
# During finetunining we will finish training the SdA by doing
# stochastich gradient descent on the MLP
for i in xrange(self.n_layers):
# construct the sigmoidal layer
# the size of the input is either the number of hidden units of
# the layer below or the input size if we are on the first layer
if i == 0:
input_size = n_ins
else:
input_size = hidden_layers_sizes[i - 1]
# the input to this layer is either the activation of the hidden
# layer below or the input of the SdA if you are on the first
# layer
if i == 0:
layer_input = self.x
else:
layer_input = self.sigmoid_layers[-1].output
sigmoid_layer = HiddenLayer(rng=numpy_rng,
input=layer_input,
n_in=input_size,
n_out=hidden_layers_sizes[i],
activation=T.nnet.sigmoid)
# add the layer to our list of layers
self.sigmoid_layers.append(sigmoid_layer)
# its arguably a philosophical question...
# but we are going to only declare that the parameters of the
# sigmoid_layers are parameters of the StackedDAA
# the visible biases in the dA are parameters of those
# dA, but not the SdA
self.params.extend(sigmoid_layer.params)
# Construct a denoising autoencoder that shared weights with this
# layer
dA_layer = dA(numpy_rng=numpy_rng,
theano_rng=theano_rng,
input=layer_input,
n_visible=input_size,
n_hidden=hidden_layers_sizes[i],
W=sigmoid_layer.W,
bhid=sigmoid_layer.b)
self.dA_layers.append(dA_layer)
# We now need to add a logistic layer on top of the MLP
self.logLayer = LogisticRegression(
input=self.sigmoid_layers[-1].output,
n_in=hidden_layers_sizes[-1], n_out=n_outs)
self.params.extend(self.logLayer.params)
# construct a function that implements one step of finetunining
# compute the cost for second phase of training,
# defined as the negative log likelihood
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
# compute the gradients with respect to the model parameters
# symbolic variable that points to the number of errors made on the
# minibatch given by self.x and self.y
self.errors = self.logLayer.errors(self.y)
def pretraining_functions(self, train_set_x, batch_size):
''' Generates a list of functions, each of them implementing one
step in trainnig the dA corresponding to the layer with same index.
The function will require as input the minibatch index, and to train
a dA you just need to iterate, calling the corresponding function on
all minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared variable that contains all datapoints used
for training the dA
:type batch_size: int
:param batch_size: size of a [mini]batch
:type learning_rate: float
:param learning_rate: learning rate used during training for any of
the dA layers
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
corruption_level = T.scalar('corruption') # % of corruption to use
learning_rate = T.scalar('lr') # learning rate to use
# number of batches
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for dA in self.dA_layers:
# get the cost and the updates list
cost, updates = dA.get_cost_updates(corruption_level,
learning_rate)
# compile the theano function
fn = theano.function(inputs=[index,
theano.Param(corruption_level, default=0.2),
theano.Param(learning_rate, default=0.1)],
outputs=cost,
updates=updates,
givens={self.x: train_set_x[batch_begin:
batch_end]})
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def build_finetune_functions(self, datasets, batch_size, learning_rate):
'''Generates a function `train` that implements one step of
finetuning, a function `validate` that computes the error on
a batch from the validation set, and a function `test` that
computes the error on a batch from the testing set
:type datasets: list of pairs of theano.tensor.TensorType
:param datasets: It is a list that contain all the datasets;
the has to contain three pairs, `train`,
`valid`, `test` in this order, where each pair
is formed of two Theano variables, one for the
datapoints, the other for the labels
:type batch_size: int
:param batch_size: size of a minibatch
:type learning_rate: float
:param learning_rate: learning rate used during finetune stage
'''
(train_set_x, train_set_y) = datasets[0]
(valid_set_x, valid_set_y) = datasets[1]
(test_set_x, test_set_y) = datasets[2]
# compute number of minibatches for training, validation and testing
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_valid_batches /= batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_test_batches /= batch_size
index = T.lscalar('index') # index to a [mini]batch
# compute the gradients with respect to the model parameters
gparams = T.grad(self.finetune_cost, self.params)
# compute list of fine-tuning updates
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - gparam * learning_rate))
train_fn = theano.function(inputs=[index],
outputs=self.finetune_cost,
updates=updates,
givens={
self.x: train_set_x[index * batch_size:
(index + 1) * batch_size],
self.y: train_set_y[index * batch_size:
(index + 1) * batch_size]},
name='train')
test_score_i = theano.function([index], self.errors,
givens={
self.x: test_set_x[index * batch_size:
(index + 1) * batch_size],
self.y: test_set_y[index * batch_size:
(index + 1) * batch_size]},
name='test')
valid_score_i = theano.function([index], self.errors,
givens={
self.x: valid_set_x[index * batch_size:
(index + 1) * batch_size],
self.y: valid_set_y[index * batch_size:
(index + 1) * batch_size]},
name='valid')
# Create a function that scans the entire validation set
def valid_score():
return [valid_score_i(i) for i in xrange(n_valid_batches)]
# Create a function that scans the entire test set
def test_score():
return [test_score_i(i) for i in xrange(n_test_batches)]
return train_fn, valid_score, test_score
def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
pretrain_lr=0.001, training_epochs=1000,
dataset='mnist.pkl.gz', batch_size=1):
"""
Demonstrates how to train and test a stochastic denoising autoencoder.
This is demonstrated on MNIST.
:type learning_rate: float
:param learning_rate: learning rate used in the finetune stage
(factor for the stochastic gradient)
:type pretraining_epochs: int
:param pretraining_epochs: number of epoch to do pretraining
:type pretrain_lr: float
:param pretrain_lr: learning rate to be used during pre-training
:type n_iter: int
:param n_iter: maximal number of iterations ot run the optimizer
:type dataset: string
:param dataset: path the the pickled dataset
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
n_train_batches /= batch_size
# numpy random generator
numpy_rng = numpy.random.RandomState(89677)
print '... building the model'
# construct the stacked denoising autoencoder class
sda = SdA(numpy_rng=numpy_rng, n_ins=28 * 28,
hidden_layers_sizes=[1000, 1000, 1000],
n_outs=10)
#########################
# PRETRAINING THE MODEL #
#########################
print '... getting the pretraining functions'
pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
batch_size=batch_size)
print '... pre-training the model'
start_time = time.clock()
## Pre-train layer-wise
corruption_levels = [.1, .2, .3]
for i in xrange(sda.n_layers):
# go through pretraining epochs
for epoch in xrange(pretraining_epochs):
# go through the training set
c = []
for batch_index in xrange(n_train_batches):
c.append(pretraining_fns[i](index=batch_index,
corruption=corruption_levels[i],
lr=pretrain_lr))
print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
print numpy.mean(c)
end_time = time.clock()
print >> sys.stderr, ('The pretraining code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
########################
# FINETUNING THE MODEL #
########################
# get the training, validation and testing function for the model
print '... getting the finetuning functions'
train_fn, validate_model, test_model = sda.build_finetune_functions(
datasets=datasets, batch_size=batch_size,
learning_rate=finetune_lr)
print '... finetunning the model'
# early-stopping parameters
patience = 10 * n_train_batches # look as this many examples regardless
patience_increase = 2. # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_params = None
best_validation_loss = numpy.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < training_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_fn(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = validate_model()
this_validation_loss = numpy.mean(validation_losses)
print('epoch %i, minibatch %i/%i, validation error %f %%' %
(epoch, minibatch_index + 1, n_train_batches,
this_validation_loss * 100.))
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (this_validation_loss < best_validation_loss *
improvement_threshold):
patience = max(patience, iter * patience_increase)
# save best validation score and iteration number
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = test_model()
test_score = numpy.mean(test_losses)
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(('Optimization complete with best validation score of %f %%,'
'with test performance %f %%') %
(best_validation_loss * 100., test_score * 100.))
print >> sys.stderr, ('The training code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
if __name__ == '__main__':
test_SdA()
In [1]:
"""
This tutorial introduces denoising auto-encoders (dA) using Theano.
Denoising autoencoders are the building blocks for SdA.
They are based on auto-encoders as the ones used in Bengio et al. 2007.
An autoencoder takes an input x and first maps it to a hidden representation
y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
latent representation y is then mapped back to a "reconstructed" vector
z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight
matrix W' can optionally be constrained such that W' = W^T, in which case
the autoencoder is said to have tied weights. The network is trained such
that to minimize the reconstruction error (the error between x and z).
For the denosing autoencoder, during training, first x is corrupted into
\tilde{x}, where \tilde{x} is a partially destroyed version of x by means
of a stochastic mapping. Afterwards y is computed as before (using
\tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
error is now measured between z and the uncorrupted input x, which is
computed as the cross-entropy :
- \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
References :
- P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
2008
- Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
Training of Deep Networks, Advances in Neural Information Processing
Systems 19, 2007
"""
import cPickle
import gzip
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from logistic_sgd import load_data
from utils import tile_raster_images
import PIL.Image
class dA(object):
"""Denoising Auto-Encoder class (dA)
A denoising autoencoders tries to reconstruct the input from a corrupted
version of it by projecting it first in a latent space and reprojecting
it afterwards back in the input space. Please refer to Vincent et al.,2008
for more details. If x is the input then equation (1) computes a partially
destroyed version of x by means of a stochastic mapping q_D. Equation (2)
computes the projection of the input into the latent space. Equation (3)
computes the reconstruction of the input, while equation (4) computes the
reconstruction error.
.. math::
\tilde{x} ~ q_D(\tilde{x}|x) (1)
y = s(W \tilde{x} + b) (2)
x = s(W' y + b') (3)
L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
"""
def __init__(self, numpy_rng, theano_rng=None, input=None,
n_visible=784, n_hidden=500,
W=None, bhid=None, bvis=None):
"""
Initialize the dA class by specifying the number of visible units (the
dimension d of the input ), the number of hidden units ( the dimension
d' of the latent or hidden space ) and the corruption level. The
constructor also receives symbolic variables for the input, weights and
bias. Such a symbolic variables are useful when, for example the input
is the result of some computations, or when weights are shared between
the dA and an MLP layer. When dealing with SdAs this always happens,
the dA on layer 2 gets as input the output of the dA on layer 1,
and the weights of the dA are used in the second stage of training
to construct an MLP.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: number random generator used to generate weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type input: theano.tensor.TensorType
:param input: a symbolic description of the input or None for
standalone dA
:type n_visible: int
:param n_visible: number of visible units
:type n_hidden: int
:param n_hidden: number of hidden units
:type W: theano.tensor.TensorType
:param W: Theano variable pointing to a set of weights that should be
shared belong the dA and another architecture; if dA should
be standalone set this to None
:type bhid: theano.tensor.TensorType
:param bhid: Theano variable pointing to a set of biases values (for
hidden units) that should be shared belong dA and another
architecture; if dA should be standalone set this to None
:type bvis: theano.tensor.TensorType
:param bvis: Theano variable pointing to a set of biases values (for
visible units) that should be shared belong dA and another
architecture; if dA should be standalone set this to None
"""
self.n_visible = n_visible
self.n_hidden = n_hidden
# create a Theano random generator that gives symbolic random values
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# note : W' was written as `W_prime` and b' as `b_prime`
if not W:
# W is initialized with `initial_W` which is uniformely sampled
# from -4*sqrt(6./(n_visible+n_hidden)) and
# 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
# converted using asarray to dtype
# theano.config.floatX so that the code is runable on GPU
initial_W = numpy.asarray(numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
size=(n_visible, n_hidden)), dtype=theano.config.floatX)
W = theano.shared(value=initial_W, name='W', borrow=True)
if not bvis:
bvis = theano.shared(value=numpy.zeros(n_visible,
dtype=theano.config.floatX),
borrow=True)
if not bhid:
bhid = theano.shared(value=numpy.zeros(n_hidden,
dtype=theano.config.floatX),
name='b',
borrow=True)
self.W = W
# b corresponds to the bias of the hidden
self.b = bhid
# b_prime corresponds to the bias of the visible
self.b_prime = bvis
# tied weights, therefore W_prime is W transpose
self.W_prime = self.W.T
self.theano_rng = theano_rng
# if no input is given, generate a variable representing the input
if input == None:
# we use a matrix because we expect a minibatch of several
# examples, each example being a row
self.x = T.dmatrix(name='input')
else:
self.x = input
self.params = [self.W, self.b, self.b_prime]
def get_corrupted_input(self, input, corruption_level):
"""This function keeps ``1-corruption_level`` entries of the inputs the
same and zero-out randomly selected subset of size ``coruption_level``
Note : first argument of theano.rng.binomial is the shape(size) of
random numbers that it should produce
second argument is the number of trials
third argument is the probability of success of any trial
this will produce an array of 0s and 1s where 1 has a
probability of 1 - ``corruption_level`` and 0 with
``corruption_level``
The binomial function return int64 data type by
default. int64 multiplicated by the input
type(floatX) always return float64. To keep all data
in floatX when floatX is float32, we set the dtype of
the binomial to floatX. As in our case the value of
the binomial is always 0 or 1, this don't change the
result. This is needed to allow the gpu to work
correctly as it only support float32 for now.
"""
return self.theano_rng.binomial(size=input.shape, n=1,
p=1 - corruption_level,
dtype=theano.config.floatX) * input
def get_hidden_values(self, input):
""" Computes the values of the hidden layer """
return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
def get_reconstructed_input(self, hidden):
"""Computes the reconstructed input given the values of the
hidden layer
"""
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
def get_cost_updates(self, corruption_level, learning_rate):
""" This function computes the cost and the updates for one trainng
step of the dA """
tilde_x = self.get_corrupted_input(self.x, corruption_level)
y = self.get_hidden_values(tilde_x)
z = self.get_reconstructed_input(y)
# note : we sum over the size of a datapoint; if we are using
# minibatches, L will be a vector, with one entry per
# example in minibatch
L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
# note : L is now a vector, where each element is the
# cross-entropy cost of the reconstruction of the
# corresponding example of the minibatch. We need to
# compute the average of all these to get the cost of
# the minibatch
cost = T.mean(L)
# compute the gradients of the cost of the `dA` with respect
# to its parameters
gparams = T.grad(cost, self.params)
# generate the list of updates
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - learning_rate * gparam))
return (cost, updates)
def test_dA(learning_rate=0.1, training_epochs=15,
dataset='mnist.pkl.gz',
batch_size=20, output_folder='dA_plots'):
"""
This demo is tested on MNIST
:type learning_rate: float
:param learning_rate: learning rate used for training the DeNosing
AutoEncoder
:type training_epochs: int
:param training_epochs: number of epochs used for training
:type dataset: string
:param dataset: path to the picked dataset
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
os.chdir(output_folder)
####################################
# BUILDING THE MODEL NO CORRUPTION #
####################################
rng = numpy.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2 ** 30))
da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
n_visible=28 * 28, n_hidden=500)
cost, updates = da.get_cost_updates(corruption_level=0.,
learning_rate=learning_rate)
train_da = theano.function([index], cost, updates=updates,
givens={x: train_set_x[index * batch_size:
(index + 1) * batch_size]})
start_time = time.clock()
############
# TRAINING #
############
# go through training epochs
for epoch in xrange(training_epochs):
# go through trainng set
c = []
for batch_index in xrange(n_train_batches):
c.append(train_da(batch_index))
print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
end_time = time.clock()
training_time = (end_time - start_time)
print >> sys.stderr, ('The no corruption code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((training_time) / 60.))
image = PIL.Image.fromarray(
tile_raster_images(X=da.W.get_value(borrow=True).T,
img_shape=(28, 28), tile_shape=(10, 10),
tile_spacing=(1, 1)))
image.save('filters_corruption_0.png')
#####################################
# BUILDING THE MODEL CORRUPTION 30% #
#####################################
rng = numpy.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2 ** 30))
da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
n_visible=28 * 28, n_hidden=500)
cost, updates = da.get_cost_updates(corruption_level=0.3,
learning_rate=learning_rate)
train_da = theano.function([index], cost, updates=updates,
givens={x: train_set_x[index * batch_size:
(index + 1) * batch_size]})
start_time = time.clock()
############
# TRAINING #
############
# go through training epochs
for epoch in xrange(training_epochs):
# go through trainng set
c = []
for batch_index in xrange(n_train_batches):
c.append(train_da(batch_index))
print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
end_time = time.clock()
training_time = (end_time - start_time)
print >> sys.stderr, ('The 30% corruption code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % (training_time / 60.))
image = PIL.Image.fromarray(tile_raster_images(
X=da.W.get_value(borrow=True).T,
img_shape=(28, 28), tile_shape=(10, 10),
tile_spacing=(1, 1)))
image.save('filters_corruption_30.png')
os.chdir('../')
if __name__ == '__main__':
test_dA()
In [9]:
import gzip
import cPickle
import numpy as np
f = gzip.open('../data/mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = cPickle.load(f)
In [34]:
X_train,y_train = train_set
X_valid,y_valid = valid_set
X_total=np.vstack((X_train, X_valid))
print(X_total.shape)
y_total = np.concatenate([y_train, y_valid])
print y_total.shape
In [41]:
array_A =[]
array_B =[]
for i in range(1000000):
array_A.append(np.random.random_integers(0, 59999))
array_B.append(np.random.random_integers(0, 59999))
In [43]:
no=0
pos_index = []
neg_index = []
for index in xrange(1000000):
if y_total[array_A[index]] == y_total[array_B[index]]:
no+=1
pos_index.append(index)
else:
neg_index.append(index)
print no
In [45]:
len(neg_index)
Out[45]:
In [47]:
import pickle
with open('array_A.pickle', 'wb') as handle:
pickle.dump(array_A, handle)
with open('array_B.pickle', 'wb') as handle:
pickle.dump(array_B, handle)
with open('pos_index.pickle', 'wb') as handle:
pickle.dump(pos_index, handle)
with open('neg_index.pickle', 'wb') as handle:
pickle.dump(neg_index, handle)