Practical Deep Learning for Coders, v3

Lesson 3_camvid_tiramisu

Image segmentation with CamVid


In [ ]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:
from fastai import *
from import *
from fastai.callbacks.hooks import *

The One Hundred Layer Tiramisu paper used a modified version of Camvid, with smaller images and few classes. You can get it from the CamVid directory of this repo:
One Hundred Layer Tiramisu这篇论文使用了改进版的CamVid数据集,该数据集图片更小、类别更少。你可以在以下库中的CamVid目录里找到它:

git clone

In [ ]:
path = Path('./data/camvid-tiramisu')

In [ ]:

Out[ ]:



In [ ]:
fnames = get_image_files(path/'val')

Out[ ]:

In [ ]:
lbl_names = get_image_files(path/'valannot')

Out[ ]:

In [ ]:
img_f = fnames[0]
img = open_image(img_f),5))

In [ ]:
def get_y_fn(x): return Path(str(x.parent)+'annot')/

codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
    'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

In [ ]:
mask = open_mask(get_y_fn(img_f)),5), alpha=1)

In [ ]:
src_size = np.array(mask.shape[1:])

Out[ ]:
(array([360, 480]), tensor([[[1, 1, 1,  ..., 5, 5, 5],
          [1, 1, 1,  ..., 5, 5, 5],
          [1, 1, 1,  ..., 5, 5, 5],
          [4, 4, 4,  ..., 3, 3, 3],
          [4, 4, 4,  ..., 3, 3, 3],
          [4, 4, 4,  ..., 3, 3, 3]]]))



In [ ]:
bs,size = 8,src_size//2

In [ ]:
src = (SegmentationItemList.from_folder(path)
       .label_from_func(get_y_fn, classes=codes))

In [ ]:
data = (src.transform(get_transforms(), tfm_y=True)

In [ ]:
data.show_batch(2, figsize=(10,7))



In [ ]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [ ]:

In [ ]:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd, bottle=True)

In [ ]:

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

In [ ]:
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)

Total time: 04:45

epoch train_loss valid_loss acc_camvid
1 1.797555 1.140855 0.666712
2 1.108703 0.773872 0.779746
3 0.780712 0.517655 0.865303
4 0.618028 0.496499 0.866009
5 0.542120 0.484854 0.861792
6 0.490998 0.412183 0.857062
7 0.458476 0.363839 0.895877
8 0.426327 0.377631 0.885415
9 0.381760 0.307907 0.908467
10 0.325531 0.288052 0.907133

In [ ]:'stage-1')

In [ ]:

In [ ]:

In [ ]:
lrs = slice(lr/100,lr)

In [ ]:
learn.fit_one_cycle(12, lrs, pct_start=0.8)

Total time: 05:52

epoch train_loss valid_loss acc_camvid
1 0.277594 0.273819 0.913931
2 0.271254 0.266760 0.916620
3 0.269084 0.269211 0.915474
4 0.273889 0.295377 0.914132
5 0.268701 0.312179 0.906329
6 0.295838 0.363080 0.902990
7 0.304576 0.323809 0.898795
8 0.290066 0.267403 0.920294
9 0.274901 0.274512 0.914693
10 0.275207 0.273877 0.920632
11 0.248439 0.236959 0.931970
12 0.224031 0.253183 0.926807

In [ ]:'stage-2');

Go big


In [ ]:

Out[ ]:

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.

In [ ]:
size = src_size

In [ ]:
data = (src.transform(get_transforms(), size=size, tfm_y=True)

In [ ]:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd, bottle=True).load('stage-2');

In [ ]:

LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:

In [ ]:
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)

Total time: 04:38

epoch train_loss valid_loss acc_camvid
1 0.197342 0.242197 0.929653
2 0.195649 0.237269 0.930523
3 0.192771 0.253759 0.926123
4 0.197877 0.244278 0.931226
5 0.197267 0.255444 0.927470
6 0.206536 0.371523 0.913392
7 0.210372 0.267855 0.927450
8 0.210874 0.278875 0.915120
9 0.205951 0.256743 0.929986
10 0.184149 0.227250 0.939358

In [ ]:'stage-1-big')

In [ ]:

In [ ]:

In [ ]:
lrs = slice(lr/1000,lr/10)

In [ ]:
learn.fit_one_cycle(10, lrs)

Total time: 04:57

epoch train_loss valid_loss acc_camvid
1 0.163259 0.226014 0.939663
2 0.159221 0.223871 0.940497
3 0.159171 0.242006 0.938531
4 0.158257 0.223698 0.941702
5 0.156215 0.227719 0.941239
6 0.155152 0.226728 0.941032
7 0.150818 0.230083 0.940657
8 0.149479 0.229187 0.940948
9 0.148236 0.229072 0.941316
10 0.148074 0.234124 0.940629

In [ ]:'stage-2-big')

In [ ]:

In [ ]:
learn.show_results(rows=3, figsize=(9,11))



In [ ]:
# start: 480x360

In [ ]:

Layer (type)         Output Shape         Param #    Trainable 
Conv2d               [8, 64, 180, 240]    9408       False     
BatchNorm2d          [8, 64, 180, 240]    128        True      
ReLU                 [8, 64, 180, 240]    0          False     
MaxPool2d            [8, 64, 90, 120]     0          False     
Conv2d               [8, 64, 90, 120]     36864      False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
ReLU                 [8, 64, 90, 120]     0          False     
Conv2d               [8, 64, 90, 120]     36864      False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
Conv2d               [8, 64, 90, 120]     36864      False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
ReLU                 [8, 64, 90, 120]     0          False     
Conv2d               [8, 64, 90, 120]     36864      False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
Conv2d               [8, 64, 90, 120]     36864      False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
ReLU                 [8, 64, 90, 120]     0          False     
Conv2d               [8, 64, 90, 120]     36864      False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
Conv2d               [8, 128, 45, 60]     73728      False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
ReLU                 [8, 128, 45, 60]     0          False     
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
Conv2d               [8, 128, 45, 60]     8192       False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
ReLU                 [8, 128, 45, 60]     0          False     
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
ReLU                 [8, 128, 45, 60]     0          False     
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
ReLU                 [8, 128, 45, 60]     0          False     
Conv2d               [8, 128, 45, 60]     147456     False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
Conv2d               [8, 256, 23, 30]     294912     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
ReLU                 [8, 256, 23, 30]     0          False     
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 256, 23, 30]     32768      False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
ReLU                 [8, 256, 23, 30]     0          False     
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
ReLU                 [8, 256, 23, 30]     0          False     
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
ReLU                 [8, 256, 23, 30]     0          False     
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
ReLU                 [8, 256, 23, 30]     0          False     
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
ReLU                 [8, 256, 23, 30]     0          False     
Conv2d               [8, 256, 23, 30]     589824     False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 512, 12, 15]     1179648    False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
ReLU                 [8, 512, 12, 15]     0          False     
Conv2d               [8, 512, 12, 15]     2359296    False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
Conv2d               [8, 512, 12, 15]     131072     False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
Conv2d               [8, 512, 12, 15]     2359296    False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
ReLU                 [8, 512, 12, 15]     0          False     
Conv2d               [8, 512, 12, 15]     2359296    False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
Conv2d               [8, 512, 12, 15]     2359296    False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
ReLU                 [8, 512, 12, 15]     0          False     
Conv2d               [8, 512, 12, 15]     2359296    False     
BatchNorm2d          [8, 512, 12, 15]     1024       True      
BatchNorm2d          [8, 512, 12, 15]     1024       True      
ReLU                 [8, 512, 12, 15]     0          False     
Conv2d               [8, 1024, 12, 15]    4719616    True      
ReLU                 [8, 1024, 12, 15]    0          False     
Conv2d               [8, 512, 12, 15]     4719104    True      
ReLU                 [8, 512, 12, 15]     0          False     
Conv2d               [8, 1024, 12, 15]    525312     True      
PixelShuffle         [8, 256, 24, 30]     0          False     
ReplicationPad2d     [8, 256, 25, 31]     0          False     
AvgPool2d            [8, 256, 24, 30]     0          False     
ReLU                 [8, 1024, 12, 15]    0          False     
BatchNorm2d          [8, 256, 23, 30]     512        True      
Conv2d               [8, 512, 23, 30]     2359808    True      
ReLU                 [8, 512, 23, 30]     0          False     
Conv2d               [8, 512, 23, 30]     2359808    True      
ReLU                 [8, 512, 23, 30]     0          False     
ReLU                 [8, 512, 23, 30]     0          False     
Conv2d               [8, 1024, 23, 30]    525312     True      
PixelShuffle         [8, 256, 46, 60]     0          False     
ReplicationPad2d     [8, 256, 47, 61]     0          False     
AvgPool2d            [8, 256, 46, 60]     0          False     
ReLU                 [8, 1024, 23, 30]    0          False     
BatchNorm2d          [8, 128, 45, 60]     256        True      
Conv2d               [8, 384, 45, 60]     1327488    True      
ReLU                 [8, 384, 45, 60]     0          False     
Conv2d               [8, 384, 45, 60]     1327488    True      
ReLU                 [8, 384, 45, 60]     0          False     
ReLU                 [8, 384, 45, 60]     0          False     
Conv2d               [8, 768, 45, 60]     295680     True      
PixelShuffle         [8, 192, 90, 120]    0          False     
ReplicationPad2d     [8, 192, 91, 121]    0          False     
AvgPool2d            [8, 192, 90, 120]    0          False     
ReLU                 [8, 768, 45, 60]     0          False     
BatchNorm2d          [8, 64, 90, 120]     128        True      
Conv2d               [8, 256, 90, 120]    590080     True      
ReLU                 [8, 256, 90, 120]    0          False     
Conv2d               [8, 256, 90, 120]    590080     True      
ReLU                 [8, 256, 90, 120]    0          False     
ReLU                 [8, 256, 90, 120]    0          False     
Conv2d               [8, 512, 90, 120]    131584     True      
PixelShuffle         [8, 128, 180, 240]   0          False     
ReplicationPad2d     [8, 128, 181, 241]   0          False     
AvgPool2d            [8, 128, 180, 240]   0          False     
ReLU                 [8, 512, 90, 120]    0          False     
BatchNorm2d          [8, 64, 180, 240]    128        True      
Conv2d               [8, 96, 180, 240]    165984     True      
ReLU                 [8, 96, 180, 240]    0          False     
Conv2d               [8, 96, 180, 240]    83040      True      
ReLU                 [8, 96, 180, 240]    0          False     
ReLU                 [8, 192, 180, 240]   0          False     
Conv2d               [8, 384, 180, 240]   37248      True      
PixelShuffle         [8, 96, 360, 480]    0          False     
ReplicationPad2d     [8, 96, 361, 481]    0          False     
AvgPool2d            [8, 96, 360, 480]    0          False     
ReLU                 [8, 384, 180, 240]   0          False     
MergeLayer           [8, 99, 360, 480]    0          False     
Conv2d               [8, 49, 360, 480]    43708      True      
ReLU                 [8, 49, 360, 480]    0          False     
Conv2d               [8, 99, 360, 480]    43758      True      
ReLU                 [8, 99, 360, 480]    0          False     
MergeLayer           [8, 99, 360, 480]    0          False     
Conv2d               [8, 12, 360, 480]    1200       True      

Total params:  41133018
Total trainable params:  19865370
Total non-trainable params:  21267648