and why they matter
Install the bleeding edge version from here: http://lasagne.readthedocs.org/en/latest/user/installation.html
In [ ]:
import numpy as np
def sum_squares(N):
return <student.Implement_me()>
In [ ]:
%%time
sum_squares(10**8)
In [ ]:
import theano
import theano.tensor as T
In [ ]:
#I gonna be function parameter
N = T.scalar("a dimension",dtype='int32')
#i am a recipe on how to produce sum of squares of arange of N given N
result = (T.arange(N)**2).sum()
#Compiling the recipe of computing "result" given N
sum_function = theano.function(inputs = [N],outputs=result)
In [ ]:
%%time
sum_function(10**8)
Still confused? We gonna fix that.
In [ ]:
#Inputs
example_input_integer = T.scalar("scalar input",dtype='float32')
example_input_tensor = T.tensor4("four dimensional tensor input") #dtype = theano.config.floatX by default
#не бойся, тензор нам не пригодится
input_vector = T.vector("my vector", dtype='int32') # vector of integers
In [ ]:
#Transformations
#transofrmation: elementwise multiplication
double_the_vector = input_vector*2
#elementwise cosine
elementwise_cosine = T.cos(input_vector)
#difference between squared vector and vector itself
vector_squares = input_vector**2 - input_vector
In [ ]:
#Practice time:
#create two vectors of size float32
my_vector = student.init_float32_vector()
my_vector2 = student.init_one_more_such_vector()
In [ ]:
#Write a transformation(recipe):
#(vec1)*(vec2) / (sin(vec1) +1)
my_transformation = student.implementwhatwaswrittenabove()
In [ ]:
print( my_transformation)
#it's okay it aint a number
In [ ]:
#What's inside the transformation
theano.printing.debugprint(my_transformation)
In [ ]:
inputs = [<two vectors that my_transformation depends on>]
outputs = [<What do we compute (can be a list of several transformation)>]
# The next lines compile a function that takes two vectors and computes your transformation
my_function = theano.function(
inputs,outputs,
allow_input_downcast=True #automatic type casting for input parameters (e.g. float64 -> float32)
)
In [ ]:
#using function with, lists:
print "using python lists:"
print my_function([1,2,3],[4,5,6])
print
#Or using numpy arrays:
#btw, that 'float' dtype is casted to secong parameter dtype which is float32
print "using numpy arrays:"
print my_function(np.arange(10),
np.linspace(5,6,10,dtype='float'))
In [ ]:
#a dictionary of inputs
my_function_inputs = {
my_vector:[1,2,3],
my_vector2:[4,5,6]
}
# evaluate my_transformation
# has to match with compiled function output
print my_transformation.eval(my_function_inputs)
# can compute transformations on the fly
print ("add 2 vectors", (my_vector + my_vector2).eval(my_function_inputs))
#!WARNING! if your transformation only depends on some inputs,
#do not provide the rest of them
print ("vector's shape:", my_vector.shape.eval({
my_vector:[1,2,3]
}))
In [ ]:
# Quest #1 - implement a function that computes a mean squared error of two input vectors
# Your function has to take 2 vectors and return a single number
<student.define_inputs_and_transformations()>
compute_mse =<student.compile_function()>
In [ ]:
# Tests
from sklearn.metrics import mean_squared_error
for n in [1,5,10,10**3]:
elems = [np.arange(n),np.arange(n,0,-1), np.zeros(n),
np.ones(n),np.random.random(n),np.random.randint(100,size=n)]
for el in elems:
for el_2 in elems:
true_mse = np.array(mean_squared_error(el,el_2))
my_mse = compute_mse(el,el_2)
if not np.allclose(true_mse,my_mse):
print ('Wrong result:')
print ('mse(%s,%s)'%(el,el_2))
print ("should be: %f, but your function returned %f"%(true_mse,my_mse))
raise ValueError("Что-то не так")
print ("All tests passed")
The inputs and transformations only exist when function is called
Shared variables always stay in memory like global variables
In [ ]:
#creating shared variable
shared_vector_1 = theano.shared(np.ones(10,dtype='float64'))
In [ ]:
#evaluating shared variable (outside symbolicd graph)
print ("initial value",shared_vector_1.get_value())
# within symbolic graph you use them just as any other inout or transformation, not "get value" needed
In [ ]:
#setting new value
shared_vector_1.set_value( np.arange(5) )
#getting that new value
print ("new value", shared_vector_1.get_value())
#Note that the vector changed shape
#This is entirely allowed... unless your graph is hard-wired to work with some fixed shape
In [ ]:
# Write a recipe (transformation) that computes an elementwise transformation of shared_vector and input_scalar
#Compile as a function of input_scalar
input_scalar = T.scalar('coefficient',dtype='float32')
scalar_times_shared = <student.write_recipe()>
shared_times_n = <student.compile_function()>
In [ ]:
print "shared:", shared_vector_1.get_value()
print "shared_times_n(5)",shared_times_n(5)
print "shared_times_n(-0.5)",shared_times_n(-0.5)
In [ ]:
#Changing value of vector 1 (output should change)
shared_vector_1.set_value([-1,0,1])
print "shared:", shared_vector_1.get_value()
print "shared_times_n(5)",shared_times_n(5)
print "shared_times_n(-0.5)",shared_times_n(-0.5)
Limitations:
In [ ]:
my_scalar = T.scalar(name='input',dtype='float64')
scalar_squared = T.sum(my_scalar**2)
#a derivative of v_squared by my_vector
derivative = T.grad(scalar_squared,my_scalar)
fun = theano.function([my_scalar],scalar_squared)
grad = theano.function([my_scalar],derivative)
In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
x = np.linspace(-3,3)
x_squared = list(map(fun,x))
x_squared_der = list(map(grad,x))
plt.plot(x, x_squared,label="x^2")
plt.plot(x, x_squared_der, label="derivative")
plt.legend()
In [ ]:
my_vector = T.vector('float64')
#Compute the gradient of the next weird function over my_scalar and my_vector
#warning! Trying to understand the meaning of that function may result in permanent brain damage
weird_psychotic_function = ((my_vector+my_scalar)**(1+T.var(my_vector)) +1./T.arcsinh(my_scalar)).mean()/(my_scalar**2 +1) + 0.01*T.sin(2*my_scalar**1.5)*(T.sum(my_vector)* my_scalar**2)*T.exp((my_scalar-4)**2)/(1+T.exp((my_scalar-4)**2))*(1.-(T.exp(-(my_scalar-4)**2))/(1+T.exp(-(my_scalar-4)**2)))**2
der_by_scalar,der_by_vector = <student.compute_grad_over_scalar_and_vector()>
compute_weird_function = theano.function([my_scalar,my_vector],weird_psychotic_function)
compute_der_by_scalar = theano.function([my_scalar,my_vector],der_by_scalar)
In [ ]:
#Plotting your derivative
vector_0 = [1,2,3]
scalar_space = np.linspace(0,7)
y = [compute_weird_function(x,vector_0) for x in scalar_space]
plt.plot(scalar_space,y,label='function')
y_der_by_scalar = [compute_der_by_scalar(x,vector_0) for x in scalar_space]
plt.plot(scalar_space,y_der_by_scalar,label='derivative')
plt.grid();plt.legend()
In [ ]:
# Multiply shared vector by a number and save the product back into shared vector
inputs = [input_scalar]
outputs = [scalar_times_shared] #return vector times scalar
my_updates = {
shared_vector_1:scalar_times_shared #and write this same result bach into shared_vector_1
}
compute_and_save = theano.function(inputs, outputs, updates=my_updates)
In [ ]:
shared_vector_1.set_value(np.arange(5))
#initial shared_vector_1
print ("initial shared value:" ,shared_vector_1.get_value())
# evaluating the function (shared_vector_1 will be changed)
print ("compute_and_save(2) returns",compute_and_save(2))
#evaluate new shared_vector_1
print ("new shared value:" ,shared_vector_1.get_value())
Implement the regular logistic regression training algorithm
Tips:
We shall train on a two-class MNIST dataset
In [ ]:
from sklearn.datasets import load_digits
mnist = load_digits(2)
X,y = mnist.data, mnist.target
print ("y [shape - %s]:"%(str(y.shape)),y[:10])
print ("X [shape - %s]:"%(str(X.shape)))
print (X[:3])
print (y[:10])
In [ ]:
# inputs and shareds
shared_weights = <student.code_me()>
input_X = <student.code_me()>
input_y = <student.code_me()>
In [ ]:
predicted_y = <predicted probabilities for input_X>
loss = <logistic loss (scalar, mean over sample)>
grad = <gradient of loss over model weights>
updates = {
shared_weights: <new weights after gradient step>
}
In [ ]:
train_function = <compile function that takes X and y, returns log loss and updates weights>
predict_function = <compile function that takes X and computes probabilities of y>
In [ ]:
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y)
In [ ]:
from sklearn.metrics import roc_auc_score
for i in range(5):
loss_i = train_function(X_train,y_train)
print ("loss at iter %i:%.4f"%(i,loss_i))
print ("train auc:",roc_auc_score(y_train,predict_function(X_train)))
print ("test auc:",roc_auc_score(y_test,predict_function(X_test)))
print ("resulting weights:")
plt.imshow(shared_weights.get_value().reshape(8,-1))
plt.colorbar()
In [ ]:
from mnist import load_dataset
X_train,y_train,X_val,y_val,X_test,y_test = load_dataset()
print X_train.shape,y_train.shape
In [ ]:
import lasagne
input_X = T.tensor4("X")
#input dimention (None means "Arbitrary" and only works at the first axes [samples])
input_shape = [None,1,28,28]
target_y = T.vector("target Y integer",dtype='int32')
Defining network architecture
In [ ]:
#Input layer (auxilary)
input_layer = lasagne.layers.InputLayer(shape = input_shape,input_var=input_X)
#fully connected layer, that takes input layer and applies 50 neurons to it.
# nonlinearity here is sigmoid as in logistic regression
# you can give a name to each layer (optional)
dense_1 = lasagne.layers.DenseLayer(input_layer,num_units=50,
nonlinearity = lasagne.nonlinearities.sigmoid,
name = "hidden_dense_layer")
#fully connected output layer that takes dense_1 as input and has 10 neurons (1 for each digit)
#We use softmax nonlinearity to make probabilities add up to 1
dense_output = lasagne.layers.DenseLayer(dense_1,num_units = 10,
nonlinearity = lasagne.nonlinearities.softmax,
name='output')
In [ ]:
#network prediction (theano-transformation)
y_predicted = lasagne.layers.get_output(dense_output)
In [ ]:
#all network weights (shared variables)
all_weights = lasagne.layers.get_all_params(dense_output)
print (all_weights)
In [ ]:
#Mean categorical crossentropy as a loss function - similar to logistic loss but for multiclass targets
loss = lasagne.objectives.categorical_crossentropy(y_predicted,target_y).mean()
#prediction accuracy
accuracy = lasagne.objectives.categorical_accuracy(y_predicted,target_y).mean()
#This function computes gradient AND composes weight updates just like you did earlier
updates_sgd = lasagne.updates.sgd(loss, all_weights,learning_rate=0.01)
In [ ]:
#function that computes loss and updates weights
train_fun = theano.function([input_X,target_y],[loss,accuracy],updates= updates_sgd)
#function that just computes accuracy
accuracy_fun = theano.function([input_X,target_y],accuracy)
In [ ]:
# An auxilary function that returns mini-batches for neural network training
#Parameters
# X - a tensor of images with shape (many, 1, 28, 28), e.g. X_train
# y - a vector of answers for corresponding images e.g. Y_train
#batch_size - a single number - the intended size of each batches
#What do need to implement
# 1) Shuffle data
# - Gotta shuffle X and y the same way not to break the correspondence between X_i and y_i
# 3) Split data into minibatches of batch_size
# - If data size is not a multiple of batch_size, make one last batch smaller.
# 4) return a list (or an iterator) of pairs
# - (подгруппа картинок, ответы из y на эту подгруппу)
def iterate_minibatches(X, y, batchsize):
<return an iterable of (X_batch, y_batch) batches of images and answers for them>
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
# You feel lost and wish you stayed home tonight?
# Go search for a similar function at
# https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
In [ ]:
import time
num_epochs = 100 #amount of passes through the data
batch_size = 50 #number of samples processed at each function call
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_acc = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,batch_size):
inputs, targets = batch
train_err_batch, train_acc_batch= train_fun(inputs, targets)
train_err += train_err_batch
train_acc += train_acc_batch
train_batches += 1
# And a full pass over the validation data:
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, batch_size):
inputs, targets = batch
val_acc += accuracy_fun(inputs, targets)
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
print(" train accuracy:\t\t{:.2f} %".format(
train_acc / train_batches * 100))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
In [ ]:
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
inputs, targets = batch
acc = accuracy_fun(inputs, targets)
test_acc += acc
test_batches += 1
print("Final results:")
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
if test_acc / test_batches * 100 > 99:
print ("Achievement unlocked: 80lvl Warlock!")
else:
print ("We need more magic!")
There is a mini-report at the end that you will have to fill in. We recommend to read it first and fill in while you are iterating.
lasagne.layers.DropoutLayer(prev_layer, p=probability_to_zero_out)
Convolution layers
network = lasagne.layers.Conv2DLayer(prev_layer,
num_filters = n_neurons,
filter_size = (filter width, filter height),
nonlinearity = some_nonlinearity)
Plenty other layers and architectures
There is a template for your solution below that you can opt to use or throw away and write it your way
In [ ]:
from mnist import load_dataset
X_train,y_train,X_val,y_val,X_test,y_test = load_dataset()
print X_train.shape,y_train.shape
In [ ]:
import lasagne
input_X = T.tensor4("X")
#input dimention (None means "Arbitrary" and only works at the first axes [samples])
input_shape = [None,1,28,28]
target_y = T.vector("target Y integer",dtype='int32')
In [ ]:
#Input layer (auxilary)
input_layer = lasagne.layers.InputLayer(shape = input_shape,input_var=input_X)
<student.code_neural_network_architecture()>
dense_output = <your network output>
In [ ]:
# Network predictions (theano-transformation)
y_predicted = lasagne.layers.get_output(dense_output)
In [ ]:
#All weights (shared-varaibles)
# "trainable" flag means not to return auxilary params like batch mean (for batch normalization)
all_weights = lasagne.layers.get_all_params(dense_output,trainable=True)
print (all_weights)
In [ ]:
#loss function
loss = <loss function>
#<optionally add regularization>
accuracy = <mean accuracy score for evaluation>
#weight updates
updates = <try different update methods>
In [ ]:
#A function that accepts X and y, returns loss functions and performs weight updates
train_fun = theano.function([input_X,target_y],[loss,accuracy],updates= updates_sgd)
#A function that just computes accuracy given X and y
accuracy_fun = theano.function([input_X,target_y],accuracy)
In [ ]:
#итерации обучения
num_epochs = <how many times to iterate over the entire training set>
batch_size = <how many samples are processed at a single function call>
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_acc = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,batch_size):
inputs, targets = batch
train_err_batch, train_acc_batch= train_fun(inputs, targets)
train_err += train_err_batch
train_acc += train_acc_batch
train_batches += 1
# And a full pass over the validation data:
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, batch_size):
inputs, targets = batch
val_acc += accuracy_fun(inputs, targets)
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
print(" train accuracy:\t\t{:.2f} %".format(
train_acc / train_batches * 100))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
In [ ]:
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
inputs, targets = batch
acc = accuracy_fun(inputs, targets)
test_acc += acc
test_batches += 1
print("Final results:")
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
if test_acc / test_batches * 100 > 99:
print ("Achievement unlocked: 80lvl Warlock!")
else:
print ("We need more magic!")
Report
All creative approaches are highly welcome, but at the very least it would be great to mention
There is no need to write strict mathematical proofs (unless you want to).
___ ___
, and here's my storyA long ago in a galaxy far far away, when it was still more than an hour before deadline, i got an idea:
How could i be so naive?!
This thing has finally converged and
That, having wasted __ [minutes, hours or days] of my life training, got
[an optional afterword and mortal curses on assignment authors]
In [ ]: