Regression with BIWI head pose dataset

This is a more advanced example to show how to create custom datasets and do regression with images. Our task is to find the center of the head in each image. The data comes from the BIWI head pose dataset, thanks to Gabriele Fanelli et al. We have converted the images to jpeg format, so you should download the converted dataset from this link.


In [ ]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:
from fastai.vision import *

Getting and converting the data


In [ ]:
path = untar_data(URLs.BIWI_HEAD_POSE)

In [ ]:
cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6); cal


Out[ ]:
array([[517.679,   0.   , 320.   ],
       [  0.   , 517.679, 240.5  ],
       [  0.   ,   0.   ,   1.   ]])

In [ ]:
fname = '09/frame_00667_rgb.jpg'

In [ ]:
def img2txt_name(f): return path/f'{str(f)[:-7]}pose.txt'

In [ ]:
img = open_image(path/fname)
img.show()



In [ ]:
ctr = np.genfromtxt(img2txt_name(fname), skip_header=3); ctr


Out[ ]:
array([187.332 ,  40.3892, 893.135 ])

In [ ]:
def convert_biwi(coords):
    c1 = coords[0] * cal[0][0]/coords[2] + cal[0][2]
    c2 = coords[1] * cal[1][1]/coords[2] + cal[1][2]
    return tensor([c2,c1])

def get_ctr(f):
    ctr = np.genfromtxt(img2txt_name(f), skip_header=3)
    return convert_biwi(ctr)

def get_ip(img,pts): return ImagePoints(FlowField(img.size, pts), scale=True)

In [ ]:
get_ctr(fname)


Out[ ]:
tensor([263.9104, 428.5814])

In [ ]:
ctr = get_ctr(fname)
img.show(y=get_ip(img, ctr), figsize=(6, 6))


Creating a dataset


In [ ]:
data = (PointsItemList.from_folder(path)
        .split_by_valid_func(lambda o: o.parent.name=='13')
        .label_from_func(get_ctr)
        .transform(get_transforms(), tfm_y=True, size=(120,160))
        .databunch().normalize(imagenet_stats)
       )

In [ ]:
data.show_batch(3, figsize=(9,6))


Train model


In [ ]:
learn = cnn_learner(data, models.resnet34)

In [ ]:
learn.lr_find()
learn.recorder.plot()


LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:
lr = 2e-2

In [ ]:
learn.fit_one_cycle(5, slice(lr))


Total time: 04:09

epoch train_loss valid_loss
1 0.052076 0.003733
2 0.018467 0.002182
3 0.009045 0.011063
4 0.005409 0.002293
5 0.004046 0.002766


In [ ]:
learn.save('stage-1')

In [ ]:
learn.load('stage-1');

In [ ]:
learn.show_results()


Data augmentation


In [ ]:
tfms = get_transforms(max_rotate=20, max_zoom=1.5, max_lighting=0.5, max_warp=0.4, p_affine=1., p_lighting=1.)

data = (PointsItemList.from_folder(path)
        .split_by_valid_func(lambda o: o.parent.name=='13')
        .label_from_func(get_ctr)
        .transform(tfms, tfm_y=True, size=(120,160))
        .databunch().normalize(imagenet_stats)
       )

In [ ]:
def _plot(i,j,ax):
    x,y = data.train_ds[0]
    x.show(ax, y=y)

plot_multi(_plot, 3, 3, figsize=(8,6))



In [ ]: