# CIFAR Conv Net

И так, в этом ноутбуке Вы сделаете превую в своей жизни сверточную сеть! На сложном датасете. Cкачайте его кстати,

!mkdir cifar10 !sudo curl -o cifar-10-python.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz !tar -xvzf cifar-10-python.tar.gz -C cifar10

``````

In :

import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

``````
``````

In :

plt.rcParams['figure.figsize'] = (10.0, 8.0)

cifar10_dir = './cifar10/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

``````
``````

In :

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cls in enumerate(classes):
idxs = np.flatnonzero(y_train == y)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(X_train[idx].astype('uint8').transpose(1, 2, 0))
plt.axis('off')
if i == 0:
plt.title(cls)
plt.show()

``````
``````

``````

# First of all -- Checking Questions

Вопрос 1: Чем отличаются современные сверточные сети от сетей 5 летней давности?

``````Больше, сложнее, глубже.

``````

Вопрос 2: Какие неприятности могут возникнуть во время обучения современных нейросетей?

``````Проблема инициализации весов (матриц). Затухание градиентов из-за слишком большой глубины. Слишком большое время работы.

``````

Вопрос 3: У вас есть очень маленький датасет из 100 картинок, классификация, но вы очень хотите использовать нейросеть, какие неприятности вас ждут и как их решить? что делать если первый вариант решения не заработает?

``````Слишком мало данных. Возможно быстрое переобучение. С помощью data augmentation, т.е. поворотов, кропов, флипов и других преобразований исходных картинок увеличить трейн.

``````

Вопрос 4: Как сделать стайл трансфер для музыки? oO

``Взять спектрограмму музыки (Фурье преобразование) - 2D матрица, применить к ней style transfer как к картинке, например VGG19, и затем восстановить обратно сигнал по измененной спектрограмме.``
``````

In :

import lasagne
from theano import tensor as T
from lasagne.nonlinearities import *

input_X = T.tensor4("X")
target_y = T.vector("target Y integer",dtype='int32')

``````
``````

Using cuDNN version 5110 on context None
Mapped name None to device cuda: Tesla K80 (0000:00:04.0)

``````

### Соберите нейронку:

• Many times x (Conv+Pool)
• Many small convolutions like 3x3
• Batch Norm
• Residual Connection
• Data Augmentation
• Learning rate Schedule
• ...

### Самое интересное

• Для сдачи задания нужно набрать на точность тесте > 92.5% (это займет много времени, торопитесь :) )
• Для получения бонусных баллов > 95.0%
• Будет очень хорошо если вы придумаете свою архитектуру или сможете обучить что-то из вышеперечисленного :)
• А для обучения всего этого добра вам будет куда удобнее использовать GPU на Amazon

Отнормируем данные (поделим на максимум и вычтем среднее). Затем добавим флипы - разворот каждого изображения. Таким образом увеличим размер трейна.

``````

In :

X_train = X_train / np.float32(255)
X_test = X_test / np.float32(255)
m = np.mean(X_train, axis=0)
X_train = X_train - m
X_test = X_test - m

X_train_flip = X_train[:,:,:,::-1]
y_train_flip = y_train
X_train = np.concatenate((X_train, X_train_flip), axis=0)
y_train = np.concatenate((y_train, y_train_flip), axis=0)

``````
``````

In :

from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.layers import ElemwiseSumLayer
from lasagne.layers import InputLayer
from lasagne.layers import DenseLayer
from lasagne.layers import GlobalPoolLayer
from lasagne.layers import ExpressionLayer
from lasagne.layers import NonlinearityLayer
from lasagne.nonlinearities import softmax, rectify
from lasagne.layers import batch_norm

def build_cnn(input_var=None, n=5):

# create a residual learning building block with two stacked 3x3 convlayers as in paper
def residual_block(l, increase_dim=False, projection=False):
input_num_filters = l.output_shape
if increase_dim:
first_stride = (2,2)
out_num_filters = input_num_filters*2
else:
first_stride = (1,1)
out_num_filters = input_num_filters

stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(3,3),
W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
stack_2 = batch_norm(ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3,3),
W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))

if increase_dim:
if projection:
# projection shortcut, as option B in paper
projection = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1),
b=None, flip_filters=False))
block = NonlinearityLayer(ElemwiseSumLayer([stack_2, projection]),nonlinearity=rectify)
else:
# identity shortcut, as option A in paper
identity = ExpressionLayer(l, lambda X: X[:, :, ::2, ::2],
lambda s: (s, s, s//2, s//2))
else:
block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]),nonlinearity=rectify)

return block

# Building the network
l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var)

# first layer, output is 16 x 32 x 32
l = batch_norm(ConvLayer(l_in, num_filters=16, filter_size=(3,3), stride=(1,1),
flip_filters=False))

# first stack of residual blocks, output is 16 x 32 x 32
for _ in range(n):
l = residual_block(l)

# second stack of residual blocks, output is 32 x 16 x 16
l = residual_block(l, increase_dim=True)
for _ in range(1,n):
l = residual_block(l)

# third stack of residual blocks, output is 64 x 8 x 8
l = residual_block(l, increase_dim=True)
for _ in range(1,n):
l = residual_block(l)

# average pooling
l = GlobalPoolLayer(l)

# fully connected layer
network = DenseLayer(
l, num_units=10,
W=lasagne.init.HeNormal(),
nonlinearity=softmax)

return network

``````

При \$n=9\$ получаем ResNet54

``````

In :

net2 = build_cnn(input_var=input_X, n=9)

``````

Добавим к функции потерь L2 регуляризатор

``````

In :

y_predicted = lasagne.layers.get_output(net2)
loss = lasagne.objectives.categorical_crossentropy(y_predicted, target_y)
loss = loss.mean()
all_layers = lasagne.layers.get_all_layers(net2)
l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * 0.0001
loss = loss + l2_penalty

accuracy = lasagne.objectives.categorical_accuracy(y_predicted, target_y).mean()

``````

Будем использовать моментум, так как с ним вышло лучше. Learning_rate делаем shared переменной, чтобы уменьшать его в процессе обучения

``````

In :

params = lasagne.layers.get_all_params(net2, trainable=True)
lr = 0.1
sh_lr = theano.shared(lasagne.utils.floatX(lr))
loss, params, learning_rate=sh_lr, momentum=0.9)

accuracy_fun = theano.function([input_X,target_y],accuracy, allow_input_downcast=True)

``````
``````

In :

all_weights = lasagne.layers.get_all_params(net2)
print all_weights

``````
``````

[W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, beta, gamma, mean, inv_std, W, b]

``````

### Вот и всё, пошли её учить

Добавим data augmentation, так как это сильно улучшает качество. Флипы мы добавили в трейн, теперь для каждого минибатча будет делать произвольный crop - обрезание картинки, и паддинг.

``````

In :

def iterate_minibatches(inputs, targets, batchsize, shuffle=False, augment=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
if augment:
# as in paper :
# pad feature arrays with 4 pixels on each side
# and do random cropping of 32x32
random_cropped = np.zeros(inputs[excerpt].shape, dtype=np.float32)
crops = np.random.random_integers(0,high=8,size=(batchsize,2))
for r in range(batchsize):
inp_exc = random_cropped
else:
inp_exc = inputs[excerpt]

yield inp_exc, targets[excerpt]

``````
``````

In :

import time

num_epochs = 70 #количество проходов по данным

batch_size = 128 #размер мини-батча

for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_acc = 0
train_batches = 0
if epoch == 40 or epoch == 60: # наилучший момент уменьшия l_r
new_lr = sh_lr.get_value() * 0.1
sh_lr.set_value(lasagne.utils.floatX(new_lr))
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True, augment=True):
inputs, targets = batch
train_err_batch, train_acc_batch= train_fun(inputs, targets)
train_err += train_err_batch
train_acc += train_acc_batch
train_batches += 1

# And a full pass over the validation data:
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
inputs, targets = batch
val_acc += accuracy_fun(inputs, targets)
val_batches += 1

# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
print("  train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100))
print("  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))

``````
``````

/home/xdr007/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:17: DeprecationWarning: This function is deprecated. Please call randint(0, 8 + 1) instead

Epoch 1 of 70 took 248.943s
training loss (in-iteration):		2.570409
train accuracy:		31.23 %
validation accuracy:		51.05 %
Epoch 2 of 70 took 248.718s
training loss (in-iteration):		1.738468
train accuracy:		56.57 %
validation accuracy:		63.64 %
Epoch 3 of 70 took 248.866s
training loss (in-iteration):		1.345167
train accuracy:		68.48 %
validation accuracy:		73.61 %
Epoch 4 of 70 took 249.274s
training loss (in-iteration):		1.121178
train accuracy:		74.76 %
validation accuracy:		77.43 %
Epoch 5 of 70 took 249.401s
training loss (in-iteration):		0.983958
train accuracy:		77.98 %
validation accuracy:		78.76 %
Epoch 6 of 70 took 249.566s
training loss (in-iteration):		0.898225
train accuracy:		80.18 %
validation accuracy:		80.12 %
Epoch 7 of 70 took 249.477s
training loss (in-iteration):		0.835352
train accuracy:		81.70 %
validation accuracy:		81.26 %
Epoch 8 of 70 took 248.984s
training loss (in-iteration):		0.793521
train accuracy:		82.99 %
validation accuracy:		83.42 %
Epoch 9 of 70 took 248.959s
training loss (in-iteration):		0.759635
train accuracy:		83.93 %
validation accuracy:		83.20 %
Epoch 10 of 70 took 249.173s
training loss (in-iteration):		0.732706
train accuracy:		84.69 %
validation accuracy:		84.11 %
Epoch 11 of 70 took 249.277s
training loss (in-iteration):		0.711621
train accuracy:		85.39 %
validation accuracy:		83.79 %
Epoch 12 of 70 took 249.290s
training loss (in-iteration):		0.695190
train accuracy:		86.12 %
validation accuracy:		84.10 %
Epoch 13 of 70 took 249.255s
training loss (in-iteration):		0.686486
train accuracy:		86.44 %
validation accuracy:		84.63 %
Epoch 14 of 70 took 249.509s
training loss (in-iteration):		0.674746
train accuracy:		86.99 %
validation accuracy:		85.88 %
Epoch 15 of 70 took 249.137s
training loss (in-iteration):		0.664110
train accuracy:		87.27 %
validation accuracy:		86.25 %
Epoch 16 of 70 took 249.223s
training loss (in-iteration):		0.658030
train accuracy:		87.62 %
validation accuracy:		85.50 %
Epoch 17 of 70 took 248.108s
training loss (in-iteration):		0.648895
train accuracy:		87.90 %
validation accuracy:		85.94 %
Epoch 18 of 70 took 248.607s
training loss (in-iteration):		0.644557
train accuracy:		88.18 %
validation accuracy:		86.51 %
Epoch 19 of 70 took 248.116s
training loss (in-iteration):		0.637330
train accuracy:		88.56 %
validation accuracy:		86.45 %
Epoch 20 of 70 took 248.156s
training loss (in-iteration):		0.631456
train accuracy:		88.84 %
validation accuracy:		86.39 %
Epoch 21 of 70 took 248.650s
training loss (in-iteration):		0.625758
train accuracy:		89.24 %
validation accuracy:		87.72 %
Epoch 22 of 70 took 248.222s
training loss (in-iteration):		0.623292
train accuracy:		89.25 %
validation accuracy:		87.25 %
Epoch 23 of 70 took 248.811s
training loss (in-iteration):		0.614412
train accuracy:		89.59 %
validation accuracy:		86.57 %
Epoch 24 of 70 took 248.307s
training loss (in-iteration):		0.614621
train accuracy:		89.65 %
validation accuracy:		86.98 %
Epoch 25 of 70 took 247.985s
training loss (in-iteration):		0.612944
train accuracy:		89.78 %
validation accuracy:		86.51 %
Epoch 26 of 70 took 247.964s
training loss (in-iteration):		0.605423
train accuracy:		90.03 %
validation accuracy:		87.76 %
Epoch 27 of 70 took 248.043s
training loss (in-iteration):		0.606387
train accuracy:		89.96 %
validation accuracy:		88.56 %
Epoch 28 of 70 took 247.982s
training loss (in-iteration):		0.602207
train accuracy:		90.05 %
validation accuracy:		87.29 %
Epoch 29 of 70 took 247.984s
training loss (in-iteration):		0.601979
train accuracy:		90.17 %
validation accuracy:		88.21 %
Epoch 30 of 70 took 248.040s
training loss (in-iteration):		0.596241
train accuracy:		90.46 %
validation accuracy:		87.16 %
Epoch 31 of 70 took 248.112s
training loss (in-iteration):		0.594715
train accuracy:		90.42 %
validation accuracy:		87.05 %
Epoch 32 of 70 took 248.259s
training loss (in-iteration):		0.591966
train accuracy:		90.58 %
validation accuracy:		88.02 %
Epoch 33 of 70 took 248.252s
training loss (in-iteration):		0.595033
train accuracy:		90.46 %
validation accuracy:		88.24 %
Epoch 34 of 70 took 248.305s
training loss (in-iteration):		0.589867
train accuracy:		90.67 %
validation accuracy:		88.72 %
Epoch 35 of 70 took 248.230s
training loss (in-iteration):		0.584997
train accuracy:		90.94 %
validation accuracy:		87.34 %
Epoch 36 of 70 took 248.134s
training loss (in-iteration):		0.591500
train accuracy:		90.66 %
validation accuracy:		87.90 %
Epoch 37 of 70 took 247.928s
training loss (in-iteration):		0.581217
train accuracy:		91.06 %
validation accuracy:		88.17 %
Epoch 38 of 70 took 248.145s
training loss (in-iteration):		0.582172
train accuracy:		91.03 %
validation accuracy:		87.96 %
Epoch 39 of 70 took 247.632s
training loss (in-iteration):		0.582201
train accuracy:		91.03 %
validation accuracy:		87.03 %
Epoch 40 of 70 took 247.856s
training loss (in-iteration):		0.579509
train accuracy:		91.13 %
validation accuracy:		88.04 %
Epoch 41 of 70 took 248.350s
training loss (in-iteration):		0.456510
train accuracy:		95.44 %
validation accuracy:		92.08 %
Epoch 42 of 70 took 248.203s
training loss (in-iteration):		0.402562
train accuracy:		97.02 %
validation accuracy:		92.61 %
Epoch 43 of 70 took 248.331s
training loss (in-iteration):		0.380056
train accuracy:		97.49 %
validation accuracy:		92.47 %
Epoch 44 of 70 took 248.239s
training loss (in-iteration):		0.361978
train accuracy:		97.88 %
validation accuracy:		92.87 %
Epoch 45 of 70 took 248.155s
training loss (in-iteration):		0.345604
train accuracy:		98.15 %
validation accuracy:		92.81 %
Epoch 46 of 70 took 248.755s
training loss (in-iteration):		0.332810
train accuracy:		98.34 %
validation accuracy:		92.52 %
Epoch 47 of 70 took 248.808s
training loss (in-iteration):		0.320478
train accuracy:		98.54 %
validation accuracy:		92.30 %
Epoch 48 of 70 took 248.043s
training loss (in-iteration):		0.308249
train accuracy:		98.73 %
validation accuracy:		92.94 %
Epoch 49 of 70 took 248.136s
training loss (in-iteration):		0.298398
train accuracy:		98.84 %
validation accuracy:		92.42 %
Epoch 50 of 70 took 248.167s
training loss (in-iteration):		0.288857
train accuracy:		98.96 %
validation accuracy:		92.47 %
Epoch 51 of 70 took 248.211s
training loss (in-iteration):		0.280645
train accuracy:		98.96 %
validation accuracy:		92.22 %
Epoch 52 of 70 took 248.229s
training loss (in-iteration):		0.271196
train accuracy:		99.12 %
validation accuracy:		92.34 %
Epoch 53 of 70 took 248.464s
training loss (in-iteration):		0.264401
train accuracy:		99.17 %
validation accuracy:		92.32 %
Epoch 54 of 70 took 248.517s
training loss (in-iteration):		0.256491
train accuracy:		99.25 %
validation accuracy:		92.50 %

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-38-d2a9865aee7f> in <module>()
16     for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True, augment=True):
17         inputs, targets = batch
---> 18         train_err_batch, train_acc_batch= train_fun(inputs, targets)
19         train_err += train_err_batch
20         train_acc += train_acc_batch

/home/xdr007/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
882         try:
883             outputs =\
--> 884                 self.fn() if output_subset is None else\
885                 self.fn(output_subset=output_subset)
886         except Exception:

KeyboardInterrupt:

``````

Так как нужное качество достиглось раньше, прервем обучение, так как выше качество поднимется не сильно (проверял), а учится не быстро.

Сохраним веса модели в файл

``````

In :

np.savez('modelresnet54.npz', *lasagne.layers.get_all_param_values(net2))

``````
``````

In :

test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
inputs, targets = batch
acc = accuracy_fun(inputs, targets)
test_acc += acc
test_batches += 1
print("Final results:")
print("  test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))

if test_acc / test_batches * 100 > 92.5:
print "Achievement unlocked: колдун 80 уровня"
else:
print "Нужно больше магии!"

``````
``````

Final results:
test accuracy:		92.79 %
Achievement unlocked: колдун 80 уровня

``````

# Дополнительно - попытка обучить VGG до хорошего качества

## VGG

``````

In :

def create_vgg(input_var, input_shape=(3, 32, 32), **junk):
# input layer
network = lasagne.layers.InputLayer(shape=(None,) + input_shape,
input_var=input_var)
# conv-relu-conv-relu-pool layers
for num_filters in ccp_num_filters:
network = lasagne.layers.Conv2DLayer(
network, num_filters=num_filters,
filter_size=(ccp_filter_size, ccp_filter_size),
nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
W=lasagne.init.GlorotUniform(gain='relu'))
network = lasagne.layers.Conv2DLayer(
network, num_filters=num_filters,
filter_size=(ccp_filter_size, ccp_filter_size),
nonlinearity=lasagne.nonlinearities.very_leaky_rectify,
W=lasagne.init.GlorotUniform(gain='relu'))
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
# fc-relu
for num_units in fc_num_units:
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=num_units,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform(gain='relu'))
# output layer
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=num_classes,
nonlinearity=lasagne.nonlinearities.softmax)
return network

``````

Создадим неполную VGG - (Conv-Conv-Pool)*4 - Dense - Drop - Dense - Drop - Out. Так же будем использовать нелинейность very_leaky_rectify, чтобы не затухал градиент.

``````

In :

param = dict(ccp_num_filters=[64, 128, 256, 512], ccp_filter_size=3,
fc_num_units=[1024, 1024], num_classes=10)

vgg = create_vgg(input_X, **param)

``````

Будем уменьшать learning_rate в зависимости от эпохи как указано ниже

``````

In :

LR_SCHEDULE = {
0: 0.001,
25: 0.0001,
35: 0.00001,
45: 0.000001,
55: 0.0000001
}

``````
``````

In :

y_predicted = lasagne.layers.get_output(vgg, deterministic=True)
all_weights = lasagne.layers.get_all_params(vgg)
print all_weights

``````
``````

[W, b, W, b, W, b, W, b, W, b, W, b, W, b, W, b, W, b, W, b, W, b]

``````
``````

In :

loss = lasagne.objectives.categorical_crossentropy(y_predicted, target_y).mean()
accuracy = lasagne.objectives.categorical_accuracy(y_predicted, target_y).mean()

``````
``````

In :

l_r = theano.shared(np.array(LR_SCHEDULE, dtype=theano.config.floatX))

train_fun = theano.function([input_X,target_y],[loss, accuracy],
allow_input_downcast=True)
accuracy_fun = theano.function([input_X,target_y],accuracy, allow_input_downcast=True)

``````

### Вот и всё, пошли её учить

``````

In :

import time

num_epochs = 80 #количество проходов по данным

batch_size = 250 #размер мини-батча

for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_acc = 0
train_batches = 0
if epoch in LR_SCHEDULE:
l_r.set_value(LR_SCHEDULE[epoch])
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train,batch_size, shuffle=True, augment=True):
inputs, targets = batch
train_err_batch, train_acc_batch= train_fun(inputs, targets)
train_err += train_err_batch
train_acc += train_acc_batch
train_batches += 1

# And a full pass over the validation data:
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_test, y_test, batch_size):
inputs, targets = batch
val_acc += accuracy_fun(inputs, targets)
val_batches += 1

# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
print("  train accuracy:\t\t{:.2f} %".format(train_acc / train_batches * 100))
print("  validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))

``````
``````

/home/xdr007/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:17: DeprecationWarning: This function is deprecated. Please call randint(0, 8 + 1) instead

Epoch 1 of 80 took 191.535s
training loss (in-iteration):		1.322589
train accuracy:		51.83 %
validation accuracy:		66.72 %
Epoch 2 of 80 took 191.609s
training loss (in-iteration):		0.749958
train accuracy:		73.93 %
validation accuracy:		78.04 %
Epoch 3 of 80 took 191.543s
training loss (in-iteration):		0.590508
train accuracy:		79.70 %
validation accuracy:		80.60 %
Epoch 4 of 80 took 191.541s
training loss (in-iteration):		0.507436
train accuracy:		82.55 %
validation accuracy:		84.17 %
Epoch 5 of 80 took 191.525s
training loss (in-iteration):		0.454285
train accuracy:		84.45 %
validation accuracy:		84.45 %
Epoch 6 of 80 took 191.524s
training loss (in-iteration):		0.420276
train accuracy:		85.63 %
validation accuracy:		84.64 %
Epoch 7 of 80 took 191.620s
training loss (in-iteration):		0.384233
train accuracy:		86.86 %
validation accuracy:		85.11 %
Epoch 8 of 80 took 191.575s
training loss (in-iteration):		0.366902
train accuracy:		87.42 %
validation accuracy:		84.91 %
Epoch 9 of 80 took 191.491s
training loss (in-iteration):		0.341593
train accuracy:		88.33 %
validation accuracy:		85.30 %
Epoch 10 of 80 took 191.588s
training loss (in-iteration):		0.324303
train accuracy:		88.97 %
validation accuracy:		86.96 %
Epoch 11 of 80 took 191.547s
training loss (in-iteration):		0.313465
train accuracy:		89.36 %
validation accuracy:		87.10 %
Epoch 12 of 80 took 191.533s
training loss (in-iteration):		0.296765
train accuracy:		89.96 %
validation accuracy:		87.04 %
Epoch 13 of 80 took 191.580s
training loss (in-iteration):		0.283628
train accuracy:		90.47 %
validation accuracy:		86.13 %
Epoch 14 of 80 took 191.531s
training loss (in-iteration):		0.272414
train accuracy:		90.82 %
validation accuracy:		87.51 %
Epoch 15 of 80 took 191.633s
training loss (in-iteration):		0.263824
train accuracy:		91.07 %
validation accuracy:		86.99 %
Epoch 16 of 80 took 191.605s
training loss (in-iteration):		0.247188
train accuracy:		91.79 %
validation accuracy:		87.18 %
Epoch 17 of 80 took 191.556s
training loss (in-iteration):		0.245085
train accuracy:		91.86 %
validation accuracy:		86.38 %
Epoch 18 of 80 took 191.565s
training loss (in-iteration):		0.240064
train accuracy:		92.15 %
validation accuracy:		87.63 %
Epoch 19 of 80 took 191.590s
training loss (in-iteration):		0.241391
train accuracy:		92.10 %
validation accuracy:		87.65 %
Epoch 20 of 80 took 191.609s
training loss (in-iteration):		0.222962
train accuracy:		92.59 %
validation accuracy:		87.98 %
Epoch 21 of 80 took 191.570s
training loss (in-iteration):		0.223210
train accuracy:		92.63 %
validation accuracy:		87.51 %
Epoch 22 of 80 took 191.610s
training loss (in-iteration):		0.214188
train accuracy:		93.00 %
validation accuracy:		88.60 %
Epoch 23 of 80 took 191.547s
training loss (in-iteration):		0.204413
train accuracy:		93.45 %
validation accuracy:		88.08 %
Epoch 24 of 80 took 191.673s
training loss (in-iteration):		0.206335
train accuracy:		93.39 %
validation accuracy:		87.37 %
Epoch 25 of 80 took 191.580s
training loss (in-iteration):		0.199097
train accuracy:		93.59 %
validation accuracy:		88.55 %
Epoch 26 of 80 took 191.545s
training loss (in-iteration):		0.091314
train accuracy:		96.93 %
validation accuracy:		90.93 %
Epoch 27 of 80 took 191.564s
training loss (in-iteration):		0.064585
train accuracy:		97.79 %
validation accuracy:		91.21 %
Epoch 28 of 80 took 191.549s
training loss (in-iteration):		0.055307
train accuracy:		98.12 %
validation accuracy:		91.11 %
Epoch 29 of 80 took 191.597s
training loss (in-iteration):		0.047676
train accuracy:		98.34 %
validation accuracy:		91.04 %
Epoch 30 of 80 took 191.616s
training loss (in-iteration):		0.042393
train accuracy:		98.55 %
validation accuracy:		91.01 %
Epoch 31 of 80 took 191.561s
training loss (in-iteration):		0.037814
train accuracy:		98.72 %
validation accuracy:		90.96 %
Epoch 32 of 80 took 191.514s
training loss (in-iteration):		0.034633
train accuracy:		98.83 %
validation accuracy:		91.27 %
Epoch 33 of 80 took 191.126s
training loss (in-iteration):		0.032671
train accuracy:		98.91 %
validation accuracy:		91.19 %
Epoch 34 of 80 took 191.317s
training loss (in-iteration):		0.029474
train accuracy:		98.99 %
validation accuracy:		91.24 %
Epoch 35 of 80 took 190.438s
training loss (in-iteration):		0.028296
train accuracy:		99.04 %
validation accuracy:		91.30 %
Epoch 36 of 80 took 190.371s
training loss (in-iteration):		0.021907
train accuracy:		99.27 %
validation accuracy:		91.44 %
Epoch 37 of 80 took 191.302s
training loss (in-iteration):		0.019583
train accuracy:		99.33 %
validation accuracy:		91.43 %
Epoch 38 of 80 took 191.288s
training loss (in-iteration):		0.019452
train accuracy:		99.33 %
validation accuracy:		91.45 %
Epoch 39 of 80 took 191.192s
training loss (in-iteration):		0.018386
train accuracy:		99.37 %
validation accuracy:		91.41 %
Epoch 40 of 80 took 191.244s
training loss (in-iteration):		0.017462
train accuracy:		99.43 %
validation accuracy:		91.40 %

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-21-27f7c511d01d> in <module>()
15     for batch in iterate_minibatches(X_train, y_train,batch_size, shuffle=True, augment=True):
16         inputs, targets = batch
---> 17         train_err_batch, train_acc_batch= train_fun(inputs, targets)
18         train_err += train_err_batch
19         train_acc += train_acc_batch

/home/xdr007/anaconda2/lib/python2.7/site-packages/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
882         try:
883             outputs =\
--> 884                 self.fn() if output_subset is None else\
885                 self.fn(output_subset=output_subset)
886         except Exception:

KeyboardInterrupt:

``````

Видно, что сетка переобучилась и качества не хватает для порога, но тем не менее качество очень неплохое. Довольно сильно помогло data augmentation и правильное изменения learning rate

``````

In [ ]:

test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
inputs, targets = batch
acc = accuracy_fun(inputs, targets)
test_acc += acc
test_batches += 1
print("Final results:")
print("  test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))

if test_acc / test_batches * 100 > 92.5:
print "Achievement unlocked: колдун 80 уровня"
else:
print "Нужно больше магии!"

``````
``````

In [ ]:

np.savez('modelcnn.npz', *lasagne.layers.get_all_param_values(vgg))

``````