In [ ]:
from __future__ import print_function
from sys import version_info
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy
import theano
import theano.tensor as T
import lasagne
try:
import cPickle as pickle
except ImportError:
import pickle
%matplotlib inline
from scipy.misc import imread, imsave, imresize
from lasagne.utils import floatX
Lasagne has a plethora of pre-training netrworks in the model zoo
We'll start by picking VGG16 and deploying it in our notebook.
Warning! VGG16 network requires around 3GB of memory to predict event for single-image batch. If you don't have that luxury, try binder or azure notebooks.
In [ ]:
!wget https://s3.amazonaws.com/lasagne/recipes/pretrained/imagenet/vgg16.pkl -O weights.pkl
In [ ]:
# copyright: see http://www.robots.ox.ac.uk/~vgg/research/very_deep/
from lasagne.layers import InputLayer
from lasagne.layers import DenseLayer
from lasagne.layers import NonlinearityLayer
from lasagne.layers import DropoutLayer
from lasagne.layers import Pool2DLayer as PoolLayer
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.nonlinearities import softmax
def build_model():
<paste network architecture here>
return net
In [ ]:
#classes' names are stored here
classes = pickle.load(open('classes.pkl', 'rb'))
#for example, 10th class is ostrich:
print(classes[9])
You have to implement two functions in the cell below.
Preprocess function should take the image with shape (w, h, 3) and transform it into a tensor with shape (1, 3, 224, 224). Without this transformation, our net won't be able to digest input image. Additionally, your preprocessing function have to rearrange channels RGB -> BGR and subtract mean values from every channel.
In [ ]:
MEAN_VALUES = np.array([104, 117, 123])
IMAGE_W = 224
def preprocess(img):
img = <convert RGB to BGR>
img = <substract mean>
#convert from [w,h,3 to 1,3,w,h]
img = np.transpose(img, (2, 0, 1))[None]
return floatX(img)
def deprocess(img):
img = img.reshape(img.shape[1:]).transpose((1, 2, 0))
for i in range(3):
img[:,:, i] += MEAN_VALUES[i]
return img[:, :, :: -1].astype(np.uint8)
img = (np.random.rand(IMAGE_W, IMAGE_W, 3) * 256).astype(np.uint8)
print(np.linalg.norm(deprocess(preprocess(img)) - img))
If your implementation is correct, the number above will be small, because deprocess function is the inverse of preprocess function
In [ ]:
net = build_model()
In [ ]:
with open('weights.pkl', 'rb') as f:
if version_info.major == 2:
weights = pickle.load(f)
elif version_info.major == 3:
weights = pickle.load(f, encoding='latin1')
<load weights into the network>
In [ ]:
input_image = T.tensor4('input')
output = lasagne.layers.get_output(net[<which layer>], input_image)
prob = theano.function([input_image], output)
In [ ]:
img = imread('sample_images/albatross.jpg')
plt.imshow(img)
plt.show()
p = prob(preprocess(img))
labels = p.ravel().argsort()[-1:-6:-1]
print('top-5 classes are:')
for l in labels:
print('%3f\t%s' % (p.ravel()[l], classes[l].split(',')[0]))
Try running network 2-3 times. If output changes, then we've probably done something wrong.
Figure out, what's the problem with the network.
hint there are two such 'problematic' layers in vgg16. They're all near the end.
You can make network deterministic by giving it such flag in the lasagne.layers.get_output
function above.
In [ ]:
!wget https://www.dropbox.com/s/d61lupw909hc785/dogs_vs_cats.train.zip?dl=1 -O data.zip
!unzip data.zip
#you may need to adjust paths in the next section, depending on your OS
In [ ]:
#extract features from images
from tqdm import tqdm
from scipy.misc import imresize
X = []
Y = []
#this may be a tedious process. If so, store the results in some pickle and re-use them.
for fname in tqdm(os.listdir('train/')):
y = fname.startswith("cat")
img = imread("train/"+fname)
img = preprocess(imresize(img,(IMAGE_W,IMAGE_W)))
features = <preprocess the image into features)
Y.append(y)
X.append(features)
In [ ]:
X = np.concatenate(X) #stack all [1xfeature] matrices into one.
assert X.ndim==2
#WARNING! the concatenate works for [1xN] matrices. If you have other format, stack them yourself.
#crop if we ended prematurely
Y = Y[:len(X)]
In [ ]:
from sklearn.cross_validation import train_test_split
<split data either here or by cross-validation>
load our dakka
In [ ]:
from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
No methods are illegal: ensembling, data augmentation, NN hacks. Just don't let test data slip into training.
The main requirement is that you implement the NN fine-tuning recipe:
lasagne.updates.mysupermegaoptimizer(loss, only_those_weights_i_wanna_train)
all_params = lasagne.layers.get_all_params(new_output_layer_or_layers,trainable=True)
old_params= lasagne.layers.get_all_params(neck_layers,trainable=True)
new_params = [w for w in all_params if w not in old_params]
updates = {}
updates.update(lasagne.updates.how_i_optimize_old_weights())
updates.update(lasagne.updates.how_i_optimize_old_weights())
assert len(updates) == len(old_updates) + len(new_updates)
### PROFIT!!!
In [ ]:
print("I can do it!")
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: