En este caso, vamos a trabajar con los mismos datos a los utilizados en el ejemplo de Logistic Regression pero usando un perceptrón multi-capa.
Cargamos los datos de igual forma que antes:
In [1]:
import numpy
import theano
import theano.tensor as T
import scipy.io as io
print '... cargando datos'
data=io.loadmat('dataLR.mat',squeeze_me=True)
dataIn=data['data'][:,0:2].astype(theano.config.floatX)
dataOut = data['data'][:,2].astype(int)
In [2]:
class CapaOculta(object):
def __init__(self, rng, input, n_in, n_out, W=None, b=None,
activation=T.tanh):
"""
Capa oculta típica de un MLP: las neuronas están todas conectadas y tienen una función de activación simoidea.
La matriz de pesos "W" es de la forma (n_in,n_out)
y el vector bias "b" (n_out,).
Nota : Usamos TANH
La función de activación viene dada por: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: Generador de número aleatorios para inicializar los pesos
:type input: theano.tensor.dmatrix
:param input: Un tensor simbólico para definir los datos de entrada (n_examples, n_in)
:type n_in: int
:param n_in: dimensionalidad de la entrada
:type n_out: int
:param n_out: número de neuronas ocultas
:type activation: theano.Op or function
:param activation: Función usada en la capa oculta
"""
self.input = input
if W is None:
W_values = numpy.asarray(
rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
),
dtype=theano.config.floatX # @UndefinedVariable
)
if activation == T.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
b = theano.shared(value=b_values, name='b', borrow=True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
#Parámetros del modelo
self.params = [self.W, self.b]
In [3]:
class LogisticRegression(object):
"""Multi-class Logistic Regression Class
The logistic regression is fully described by a weight matrix :math:`W`
and bias vector :math:`b`. Classification is done by projecting data
points onto a set of hyperplanes, the distance to which is used to
determine a class membership probability.
"""
def __init__(self, input, n_in, n_out):
""" Initialize the parameters of the logistic regression
:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture (one minibatch)
:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie
:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie
"""
# start-snippet-1
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
# initialize the baises b as a vector of n_out 0s
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
# symbolic expression for computing the matrix of class-membership
# probabilities
# Where:
# W is a matrix where column-k represent the separation hyper plain for
# class-k
# x is a matrix where row-j represents input training sample-j
# b is a vector where element-k represent the free parameter of hyper
# plain-k
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
# symbolic description of how to compute prediction as class whose
# probability is maximal
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
# end-snippet-1
# parameters of the model
self.params = [self.W, self.b]
def negative_log_likelihood(self, y):
"""Return the mean of the negative log-likelihood of the prediction
of this model under a given target distribution.
.. math::
\frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
\frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
\log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
\ell (\theta=\{W,b\}, \mathcal{D})
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
Note: we use the mean instead of the sum so that
the learning rate is less dependent on the batch size
"""
# start-snippet-2
# y.shape[0] is (symbolically) the number of rows in y, i.e.,
# number of examples (call it n) in the minibatch
# T.arange(y.shape[0]) is a symbolic vector which will contain
# [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
# Log-Probabilities (call it LP) with one row per example and
# one column per class LP[T.arange(y.shape[0]),y] is a vector
# v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
# LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
# the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch.
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
# end-snippet-2
def errors(self, y):
"""Return a float representing the number of errors in the minibatch
over the total number of examples of the minibatch ; zero one
loss over the size of the minibatch
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
"""
# check if y has same dimension of y_pred
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
)
# check if y is of the correct datatype
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
In [4]:
class MLP(object):
"""Clase Perceptrón multicapa
Vamos a definir una sola capa oculta usando la clase CapaOculta que hemos creado anteriormente, y usaremos una capa de
salida tipo softmax para la que usaremos la clase LogisticRegression.
"""
def __init__(self, rng, input, n_in, n_hidden, n_out):
"""Initialize the parameters for the multilayer perceptron
:type rng: numpy.random.RandomState
:param rng: Generador de número aleatorios para la inicialización de los pesos
:type input: theano.tensor.TensorType
:param input: Variable simbólica para la entrada al MLP
:type n_in: int
:param n_in: Número de neuronas de entrada
:type n_hidden: int
:param n_hidden: Número de neuronas en la capa oculta
:type n_out: int
:param n_out: Número de neuronas en la capa de salida
"""
# Creamos la capa oculta
self.hiddenLayer = CapaOculta(
rng=rng,
input=input,
n_in=n_in,
n_out=n_hidden,
activation=T.tanh
)
# La capa LR tendrá como entrada las neuronas de la capa oculta
self.logRegressionLayer = LogisticRegression(
input=self.hiddenLayer.output,
n_in=n_hidden,
n_out=n_out
)
# end-snippet-2 start-snippet-3
# L1 norm: Nos sirve para regularizar.
self.L1 = (
abs(self.hiddenLayer.W).sum()
+ abs(self.logRegressionLayer.W).sum()
)
# square of L2 norm: otra forma de regularizar.
self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum()
+ (self.logRegressionLayer.W ** 2).sum()
)
# Return the mean of the negative log-likelihood of the prediction
# of this model under a given target distribution.
self.negative_log_likelihood = (
self.logRegressionLayer.negative_log_likelihood
)
# Almacenamos los errores
self.errors = self.logRegressionLayer.errors
# Guardamos como parámetros los parámetros de las dos capas
self.params = self.hiddenLayer.params + self.logRegressionLayer.params
In [ ]:
learning_rate=0.1
L1_reg=0.00
L2_reg=0.0001
n_epochs=10000
batch_size=20
n_hidden=10
train_set_x = theano.shared(dataIn)
train_set_y = theano.shared(dataOut)
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
print '... building the model'
index = T.iscalar() # Índice del lote
x = T.matrix('x') # Datos de entrada
y = T.lvector('y') # Datos de salida esperados
rng = numpy.random.RandomState(1234)
# Construimos el objeto MLP
classifier = MLP(
rng=rng,
input=x,
n_in=2,
n_hidden=n_hidden,
n_out=1
)
# Función de coste a minimizar
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# Calculamos el gradiente de la función de coste con respecto a los parámetros de las dos capas
gparams = [T.grad(cost, param) for param in classifier.params]
# Y definimos las actualizaciones de los parámetros
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
]
print train_set_x.dtype
print train_set_y.dtype
print index.dtype
# Compilamos la función de aprendizaje
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
print '... entrenando'
epoch = 0
while (epoch < n_epochs):
epoch = epoch + 1
minibatch_avg_cost = 0
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = minibatch_avg_cost + train_model(minibatch_index)
print "Época: " + repr(epoch) + " - Error medio: " + repr(minibatch_avg_cost/n_train_batches)
predict = theano.function(
inputs=[index],
outputs=classifier.logRegressionLayer.y_pred,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size]
}
)
test = [predict(i) for i
in xrange(n_train_batches)]
print test
Utilizar la base de datos "digits.mat" y configurar nuestro MLP para esta base de datos.
In [ ]: