In [1]:
import numpy
import theano
import theano.tensor as T
import scipy.io as io
print '... cargando datos'
data=io.loadmat('dataLR.mat',squeeze_me=True)
dataIn=data['data'][:,0:2]
dataOut = data['data'][:,2]
In [2]:
class CapaOculta(object):
def __init__(self, rng, input, n_in, n_out, W=None, b=None,
activation=T.tanh):
"""
Capa oculta típica de un MLP: las neuronas están todas conectadas y tienen una función de activación simoidea.
La matriz de pesos "W" es de la forma (n_in,n_out)
y el vector bias "b" (n_out,).
Nota : Usamos TANH
La función de activación viene dada por: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: Generador de número aleatorios para inicializar los pesos
:type input: theano.tensor.dmatrix
:param input: Un tensor simbólico para definir los datos de entrada (n_examples, n_in)
:type n_in: int
:param n_in: dimensionalidad de la entrada
:type n_out: int
:param n_out: número de neuronas ocultas
:type activation: theano.Op or function
:param activation: Función usada en la capa oculta
"""
self.input = input
if W is None:
W_values = numpy.asarray(
rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
),
dtype=theano.config.floatX # @UndefinedVariable
)
if activation == T.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
b = theano.shared(value=b_values, name='b', borrow=True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
#Parámetros del modelo
self.params = [self.W, self.b]
def output(self,x):
return T.dot(x, self.W) + self.b
In [56]:
class LogisticRegression(object):
"""Multi-class Logistic Regression Class
The logistic regression is fully described by a weight matrix :math:`W`
and bias vector :math:`b`. Classification is done by projecting data
points onto a set of hyperplanes, the distance to which is used to
determine a class membership probability.
"""
def __init__(self, input, n_in, n_out):
""" Initialize the parameters of the logistic regression
:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture (one minibatch)
:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie
:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie
"""
# start-snippet-1
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
# initialize the baises b as a vector of n_out 0s
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
# symbolic expression for computing the matrix of class-membership
# probabilities
# Where:
# W is a matrix where column-k represent the separation hyper plain for
# class-k
# x is a matrix where row-j represents input training sample-j
# b is a vector where element-k represent the free parameter of hyper
# plain-k
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
# symbolic description of how to compute prediction as class whose
# probability is maximal
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
# end-snippet-1
# parameters of the model
self.params = [self.W, self.b]
def negative_log_likelihood(self, y):
"""Return the mean of the negative log-likelihood of the prediction
of this model under a given target distribution.
.. math::
\frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
\frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
\log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
\ell (\theta=\{W,b\}, \mathcal{D})
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
Note: we use the mean instead of the sum so that
the learning rate is less dependent on the batch size
"""
# start-snippet-2
# y.shape[0] is (symbolically) the number of rows in y, i.e.,
# number of examples (call it n) in the minibatch
# T.arange(y.shape[0]) is a symbolic vector which will contain
# [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
# Log-Probabilities (call it LP) with one row per example and
# one column per class LP[T.arange(y.shape[0]),y] is a vector
# v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
# LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
# the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch.
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
# end-snippet-2
def errors(self, y):
"""Return a float representing the number of errors in the minibatch
over the total number of examples of the minibatch ; zero one
loss over the size of the minibatch
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
"""
# check if y has same dimension of y_pred
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
)
# check if y is of the correct datatype
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
def output(self,x):
pred=T.nnet.softmax(T.dot(x, self.W) + self.b)
return T.argmax(pred, axis=1)
In [59]:
class MLP(object):
"""Clase Perceptrón multicapa
Vamos a definir una sola capa oculta usando la clase CapaOculta que hemos creado anteriormente, y usaremos una capa de
salida tipo softmax para la que usaremos la clase LogisticRegression.
"""
def __init__(self, rng, input, n_in, n_hidden, n_out):
"""Initialize the parameters for the multilayer perceptron
:type rng: numpy.random.RandomState
:param rng: Generador de número aleatorios para la inicialización de los pesos
:type input: theano.tensor.TensorType
:param input: Variable simbólica para la entrada al MLP
:type n_in: int
:param n_in: Número de neuronas de entrada
:type n_hidden: int
:param n_hidden: Número de neuronas en la capa oculta
:type n_out: int
:param n_out: Número de neuronas en la capa de salida
"""
# Creamos la capa oculta
self.hiddenLayer = CapaOculta(
rng=rng,
input=input,
n_in=n_in,
n_out=n_hidden,
activation=T.tanh
)
# La capa LR tendrá como entrada las neuronas de la capa oculta
self.logRegressionLayer = LogisticRegression(
input=self.hiddenLayer.output,
n_in=n_hidden,
n_out=n_out
)
# end-snippet-2 start-snippet-3
# L1 norm: Nos sirve para regularizar.
self.L1 = (
abs(self.hiddenLayer.W).sum()
+ abs(self.logRegressionLayer.W).sum()
)
# square of L2 norm: otra forma de regularizar.
self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum()
+ (self.logRegressionLayer.W ** 2).sum()
)
# Return the mean of the negative log-likelihood of the prediction
# of this model under a given target distribution.
self.negative_log_likelihood = (
self.logRegressionLayer.negative_log_likelihood
)
# Almacenamos los errores
self.errors = self.logRegressionLayer.errors
# Guardamos como parámetros los parámetros de las dos capas
self.params = self.hiddenLayer.params + self.logRegressionLayer.params
def output(self,x):
#respuestaHidden = self.hiddenLayer.output()
return self.logRegressionLayer.output(x)
In [60]:
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
batch_size=20, n_hidden=30):
train_set_x = theano.shared(numpy.asarray(dataIn,
dtype=theano.config.floatX),borrow=True)
train_set_y = T.cast(theano.shared(numpy.asarray(dataOut,
dtype=theano.config.floatX),borrow=True),'int32')
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
print '... building the model'
index = T.lscalar() # Índice del lote
x = T.matrix('x') # Datos de entrada
y = T.ivector('y') # Datos de salida esperados
rng = numpy.random.RandomState(1234)
# Construimos el objeto MLP
classifier = MLP(
rng=rng,
input=x,
n_in=2,
n_hidden=n_hidden,
n_out=2
)
# Función de coste a minimizar
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# Calculamos el gradiente de la función de coste con respecto a los parámetros de las dos capas
gparams = [T.grad(cost, param) for param in classifier.params]
# Y definimos las actualizaciones de los parámetros
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
]
# Compilamos la función de aprendizaje
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# Entrenamos la red
print '... entrenando'
epoch = 0
while (epoch < n_epochs):
epoch = epoch + 1
minibatch_avg_cost = 0
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = minibatch_avg_cost + train_model(minibatch_index)
#print "Época: " + repr(epoch) + " - Error medio: " + repr(minibatch_avg_cost/n_train_batches)
# Sacamos los resultados por pantalla
print dataOut
for i in range(dataIn.shape[0]):
print classifier.output(dataIn[i])
if __name__ == '__main__':
test_mlp()
In [14]:
y_pred
In [3]:
import theano.tensor as T
from theano import function
x = T.dmatrix('x')
s = 1 / (1 + T.exp(-x))
logistic = function([x], s)
logistic([[0, 1], [-1, -2]])
Out[3]:
In [1]:
import scipy.io as io
print '... cargando datos'
data=io.loadmat('dataLR.mat',squeeze_me=True)
dataIn=data['data'][:,0:2].astype(float)
dataOut = data['data'][:,2].astype(int)
In [14]:
import matplotlib.pyplot as plt
import numpy as np
x=np.zeros((100,2))
y=np.zeros((100,2))
for i in range(100):
if (dataOut[i]==1):
x[i,:]=dataIn[i,:]
else:
y[i,:]=dataIn[i,:]
plt.plot(x[:,0],x[:,1],'ro', y[:,0],y[:,1],'go')
plt.axis([25,100,25,100])
plt.show()
In [15]:
import numpy
import theano
import theano.tensor as T
rng = numpy.random
steps=100000
feats=2
x = T.lmatrix("x")
y = T.lvector("y")
w = theano.shared(rng.randn(feats))
b = theano.shared(0.)
print "Modelo inicial:"
print "W (tamaño): " + repr(w.get_value().shape)
print "b (valor): " + repr(b.get_value())
import scipy.io as io
print '... cargando datos'
data=io.loadmat('dataLR.mat',squeeze_me=True)
dataIn=data['data'][:,0:2].astype(float)
dataOut = data['data'][:,2].astype(int)
'''N = 400
feats = 2
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
#dataIn=D[0]
#dataOut=D[1]
'''
# Construct Theano expression graph
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1
prediction = p_1 > 0.5 # The prediction thresholded
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost
# (we shall return to this in a
# following section of this tutorial
# Compile
train = theano.function(
inputs=[x,y],
outputs=[prediction, xent],
updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)),allow_input_downcast=True)
predict = theano.function(inputs=[x], outputs=prediction,allow_input_downcast=True)
# Train
for i in range(steps):
pred, err = train(dataIn, dataOut)
print "Valores esperados: ", dataOut
pp= predict(dataIn)
print "Valores previstos: ", pp
print "Tasa de acierto: ", map(lambda x,y:x==y, dataOut, predict(dataIn)).count(True)
In [16]:
import matplotlib.pyplot as plt
import numpy as np
x=np.zeros((100,2))
y=np.zeros((100,2))
for i in range(100):
if (pp[i]==1):
x[i,:]=dataIn[i,:]
else:
y[i,:]=dataIn[i,:]
plt.plot(x[:,0],x[:,1],'ro', y[:,0],y[:,1],'go')
plt.axis([25,100,25,100])
plt.show()
In [ ]: