In [1]:
    
import os
import numpy as np
np.random.seed(777)
import cv2
import scipy
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
    
In [2]:
    
path_to_caffemodel = 'bvlc_reference_caffenet.caffemodel'
path_to_chainermodel = 'bvlc_reference_caffenet.chainermodel'
path_to_mean = 'ilsvrc_2012_mean.npy'
path_to_dataset = 'oxfordflower17/'
    
In [3]:
    
if not os.path.isdir(path_to_dataset):
    ! wget http://www.robots.ox.ac.uk/~vgg/data/bicos/data/oxfordflower17.tar
    ! tar -xf oxfordflower17.tar
    
In [4]:
    
from scipy import io
labels = scipy.io.loadmat(path_to_dataset + 'imagelabels.mat')['labels'][0]
labels -= 1
print(labels)
print(len(labels))
    
    
In [5]:
    
path_to_images = path_to_dataset + 'jpg/'
images = os.listdir(path_to_images)
images = [path_to_images+i for i in images if i.endswith('.jpg')]
images.sort()
print(len(images))
    
    
In [6]:
    
train_data = []
train_labels = []
test_data = []
test_labels = []
for i,image in enumerate(images):
    if i % 80 == 0: 
        perm = np.random.permutation(80)
    if perm[i % 80] < 60:
        train_data.append(images[i])
        train_labels.append(labels[i])
    else:
        test_data.append(images[i])
        test_labels.append(labels[i])
    
In [7]:
    
label_names = ['Daffodil', 'Snowdrop', 'Lily Valley', 'Bluebell',
               'Crocus', 'Iris', 'Tigerlily', 'Tulip', 'Fritillary',
               'Sunflower', 'Daisy', 'Colls\' Foot', 'Dandelion',
               'Cowslip', 'Buttercup', 'Windflower', 'Pansy']
    
In [8]:
    
plt.imshow(cv2.imread(train_data[0], cv2.IMREAD_COLOR)[:,:,[2,1,0]])
print(label_names[train_labels[0]])
    
    
    
In [9]:
    
if not os.path.isfile(path_to_caffemodel):
    ! wget http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel
    
In [10]:
    
import chainer
from chainer import cuda
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
from chainer import serializers
    
In [11]:
    
import chainer.functions.caffe
if not os.path.isfile(path_to_chainermodel):
    caffenet = chainer.functions.caffe.CaffeFunction(path_to_model)
    chainer.serializers.save_hdf5(path_to_chainermodel, caffenet)
else:
    from caffenet import CaffeNet
    caffenet = CaffeNet()
    chainer.serializers.load_hdf5(path_to_chainermodel, caffenet)
    
In [12]:
    
class Finetune(chainer.Chain):
    def __init__(self):
        super(self.__class__, self).__init__(
            conv1=L.Convolution2D(3, 96, ksize=11, stride=4),
            conv2=L.Convolution2D(96, 256, ksize=5, pad=2),
            conv3=L.Convolution2D(256, 384, ksize=3, pad=1),
            conv4=L.Convolution2D(384, 384, ksize=3, pad=1),
            conv5=L.Convolution2D(384, 256, ksize=3, pad=1),
        )
        self.ft1 = chainer.Chain(
            fc6=L.Linear(9216, 4096),
            fc7=L.Linear(4096, 4096),
        )
        self.ft2 = chainer.Chain(
            fc8ft=L.Linear(4096, 17),
        )
    def __call__(self, x, t, train=True):
        y = self.forward(x, train=train)
        self.loss = F.softmax_cross_entropy(y, t)
        self.accuracy = F.accuracy(y, t)
        return self.loss
    def forward(self, x, train=False):
        if train: x.volatile = 'on' # do not backward while volatile='on'
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, ksize=3, stride=2)
        h = F.local_response_normalization(h, k=5, n=5, alpha=1e-4, beta=0.75)
        h *= np.power(5, 0.75)
        h = F.relu(self.conv2(h))
        h = F.max_pooling_2d(h, ksize=3, stride=2)
        h = F.local_response_normalization(h, k=5, n=5, alpha=1e-4, beta=0.75)
        h *= np.power(5, 0.75)
        h = F.relu(self.conv3(h))
        h = F.relu(self.conv4(h))
        h = F.relu(self.conv5(h))
        h = F.max_pooling_2d(h, ksize=3, stride=2)
        if train: h.volatile = 'off' # backward to fc6
        h = F.dropout(F.relu(self.ft1.fc6(h)), train=train)
        h = F.dropout(F.relu(self.ft1.fc7(h)), train=train)
        h = self.ft2.fc8ft(h)
        return h
    
In [13]:
    
def load_params(dst_model, src_model):
    for src_param in src_model.children():
        for dst_param in dst_model.children():
            if dst_param.name == src_param.name:
                try:
                    dst_param.copyparams(src_param)
                except Exception as e:
                    print(e)
                continue
    
In [14]:
    
model = Finetune()
load_params(model, caffenet)
load_params(model.ft1, caffenet)
del caffenet
    
In [15]:
    
if not os.path.isfile(path_to_mean):
    ! wget https://github.com/BVLC/caffe/raw/master/python/caffe/imagenet/ilsvrc_2012_mean.npy
    
In [16]:
    
mean = np.load(path_to_mean).mean(1).mean(1)
print(mean) # BGR order
    
    
In [17]:
    
def improcess(image, train):
    if train:
        image = cv2.resize(image, (256,256))
        if np.random.randint(0, 1): 
            image = image[:,::-1,:] # flip
        t = np.random.randint(0, 256-1-227) # top
        l = np.random.randint(0, 256-1-227) # left
        image = image[t:t+227, l:l+227, :] # crop
    else:
        image = cv2.resize(image, (227,227))
    image = np.transpose(image, (2,0,1)).astype(np.float32)
    image -= mean[:, np.newaxis, np.newaxis]
    return image
def imdeprocess(image):
    image += mean[:, np.newaxis, np.newaxis]
    image /= 255
    image = image.transpose(1,2,0)
    return image
    
In [18]:
    
optimizer1 = chainer.optimizers.MomentumSGD(lr=1e-5)
optimizer1.setup(model.ft1)
optimizer1.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
optimizer2 = chainer.optimizers.MomentumSGD(lr=1e-3)
optimizer2.setup(model.ft2)
optimizer2.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))
    
In [19]:
    
gpuid = 0 # gpu device ID (cpu if negative)
xp = cuda.cupy if gpuid >= 0 else np
if gpuid >= 0:
    cuda.get_device(gpuid).use()
    model.to_gpu()
    model.ft1.to_gpu()
    model.ft2.to_gpu()
    
In [20]:
    
import csv
log_train = open('ft_train.log', 'w')
writer_train = csv.writer(log_train, lineterminator='\n')
writer_train.writerow(('iter', 'loss'))
log_test = open('ft_test.log', 'w')
writer_test = csv.writer(log_test, lineterminator='\n')
writer_test.writerow(('iter', 'loss', 'acc'))
    
    Out[20]:
In [21]:
    
def create_batch(x, t, indices, train):
    n_batch = len(indices)
    im_size = 227
    x_batch = np.zeros((n_batch,3,im_size,im_size), dtype=np.float32)
    t_batch = np.zeros(n_batch, dtype=np.int32)
    for i, index in enumerate(indices):
        t_batch[i] = t[index]
        image = cv2.imread(x[index], cv2.IMREAD_COLOR)
        x_batch[i] = improcess(image, train)
    return x_batch, t_batch
    
In [22]:
    
import sys
from tqdm import tqdm
def test(x, t):
    batchsize = 20
    sum_accuracy = sum_loss = 0
    with tqdm(total=len(t)) as pbar:
        pbar.set_description('test')
        for i in range(0, len(t), batchsize):
            pbar.update(batchsize)
            x_batch, t_batch = create_batch(x, t, np.arange(i,i + batchsize), train=False)
            x_batch = chainer.Variable(xp.asarray(x_batch), volatile='on')
            t_batch = chainer.Variable(xp.asarray(t_batch), volatile='on')
            loss = model(x_batch, t_batch, train=False)
            sum_loss += float(loss.data)
            sum_accuracy += float(model.accuracy.data)
    sys.stderr.flush()
    return sum_loss * batchsize / len(t), sum_accuracy * batchsize / len(t)
    
In [23]:
    
loss, acc = test(test_data, test_labels)
writer_test.writerow((0, loss, acc))
sys.stdout.write('test: loss={0:.6f}, accuracy={1:.6f}'.format(loss, acc))
    
    
    
In [24]:
    
n_epoch = 30
batchsize = 20
n_data = len(train_labels)
x = train_data
t = train_labels
# Learning loop
for epoch in range(n_epoch):
    sys.stdout.write('(epoch: {})\n'.format(epoch + 1))
    sys.stdout.flush()
    
    # training
    perm = np.random.permutation(n_data)
    with tqdm(total=n_data) as pbar:
        for i in range(0, n_data, batchsize):
            it = epoch * n_data + i + batchsize
            x_batch, t_batch = create_batch(x, t, perm[i:i + batchsize], train=True)
            x_batch = chainer.Variable(xp.asarray(x_batch), volatile='off')
            t_batch = chainer.Variable(xp.asarray(t_batch), volatile='off')
            model.ft1.cleargrads()
            model.ft2.cleargrads()
            loss = model(x_batch, t_batch)
            loss.backward()
            optimizer1.update()
            optimizer2.update()
            loss = float(model.loss.data)
            writer_train.writerow((it, loss))
            pbar.set_description('train: loss={0:.6f}'.format(loss))
            pbar.update(batchsize)
    sys.stderr.flush()
    
    # evaluate
    loss, acc = test(test_data, test_labels)
    writer_test.writerow((it, loss, acc))
    sys.stdout.write('test: loss={0:.6f}, accuracy={1:.6f}\n'.format(loss, acc))
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
In [25]:
    
log_train.close()
log_test.close()
log_train = np.loadtxt('ft_train.log', delimiter=',', skiprows=True).transpose()
log_test = np.loadtxt('ft_test.log', delimiter=',', skiprows=True).transpose()
    
In [26]:
    
fig, ax1 = plt.subplots()
ax1.plot(log_test[0], log_test[2], 'b-')
ax1.set_xlabel('iteration')
ax1.set_ylabel('accuracy', color='b')
ax1.set_ylim(0, 1)
for tl in ax1.get_yticklabels():
    tl.set_color('b')
plt.legend(['test accuracy'], bbox_to_anchor=(1.48, 1.05), framealpha=0)
ax2 = ax1.twinx()
ax2.plot(log_train[0], log_train[1], color='#ff7700')
ax2.plot(log_test[0], log_test[1], 'r-')
ax2.set_ylabel('loss', color='r')
for tl in ax2.get_yticklabels():
    tl.set_color('r')
plt.legend(['train loss', 'test loss'], bbox_to_anchor=(1.4, 0.96), framealpha=0)
plt.tight_layout()
plt.show()
    
    
In [27]:
    
index = np.random.randint(len(test_data))
image = cv2.imread(test_data[index], cv2.IMREAD_COLOR)
x = improcess(image, train=False)
plt.imshow(imdeprocess(improcess(image, train=False))[:,:,::-1])
print(label_names[test_labels[index]])
    
    
    
In [28]:
    
def predict(x, top=5):
    x = x[np.newaxis,:,:,:]
    x_batch = chainer.Variable(xp.asarray(x), volatile='on')
    y_batch = F.softmax(model.forward(x_batch))
    results = list(zip(y_batch.data[0], label_names))
    results.sort(reverse=True)
    for rank, (score, name) in enumerate(results[:top], start=1):
        print('#{0:2d}| {1:12s} | {2:7.3f}%'.format(rank, name, float(score*100)))
    
In [29]:
    
predict(x)