Image segmentation with CamVid


In [ ]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [ ]:
path = untar_data(URLs.CAMVID)
path.ls()


Out[ ]:
[PosixPath('/home/ubuntu/.fastai/data/camvid/images'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]

In [ ]:
path_lbl = path/'labels'
path_img = path/'images'

Subset classes


In [ ]:
# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data


In [ ]:
fnames = get_image_files(path_img)
fnames[:3]


Out[ ]:
[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]

In [ ]:
lbl_names = get_image_files(path_lbl)
lbl_names[:3]


Out[ ]:
[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]

In [ ]:
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))



In [ ]:
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'

In [ ]:
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)



In [ ]:
src_size = np.array(mask.shape[1:])
src_size,mask.data


Out[ ]:
(array([720, 960]), tensor([[[ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          ...,
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30]]]))

In [ ]:
codes = np.loadtxt(path/'codes.txt', dtype=str); codes


Out[ ]:
array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets


In [ ]:
size = src_size//2

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else:           bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:
src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt')
       .label_from_func(get_y_fn, classes=codes))

In [ ]:
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:
data.show_batch(2, figsize=(10,7))



In [ ]:
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)


Model


In [ ]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [ ]:
metrics=acc_camvid
# metrics=accuracy

In [ ]:
wd=1e-2

In [ ]:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:
lr_find(learn)
learn.recorder.plot()


LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:
lr=3e-3

In [ ]:
learn.fit_one_cycle(10, slice(lr), pct_start=0.9)


Total time: 05:30

epoch train_loss valid_loss acc_camvid
1 1.172738 0.771233 0.820753
2 0.799105 0.637969 0.825231
3 0.731867 0.628517 0.824456
4 0.630086 0.486045 0.860029
5 0.616145 0.535374 0.860695
6 0.588540 0.439988 0.879501
7 0.551265 0.412592 0.884478
8 0.568172 0.679087 0.832221
9 0.538392 0.405911 0.879605
10 0.451235 0.353972 0.892891


In [ ]:
learn.save('stage-1')

In [ ]:
learn.load('stage-1');

In [ ]:
learn.show_results(rows=3, figsize=(8,9))



In [ ]:
learn.unfreeze()

In [ ]:
lrs = slice(lr/400,lr/4)

In [ ]:
learn.fit_one_cycle(12, lrs, pct_start=0.8)


Total time: 06:39

epoch train_loss valid_loss acc_camvid
1 0.389135 0.334715 0.896700
2 0.377873 0.324080 0.900284
3 0.369020 0.325073 0.904146
4 0.355022 0.308820 0.912556
5 0.351138 0.313001 0.909351
6 0.347777 0.285509 0.920183
7 0.338683 0.306076 0.909899
8 0.318913 0.303712 0.915792
9 0.312038 0.276126 0.920137
10 0.311217 0.276649 0.925244
11 0.285135 0.268458 0.922453
12 0.256778 0.262011 0.926964


In [ ]:
learn.save('stage-2');

Go big

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.


In [ ]:
learn.destroy()

size = src_size

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else:           bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:
learn.load('stage-2');

In [ ]:
lr_find(learn)
learn.recorder.plot()


LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:
lr=1e-3

In [ ]:
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)


Total time: 20:31

epoch train_loss valid_loss acc_camvid
1 0.366140 0.339497 0.908682
2 0.333278 0.304011 0.916702
3 0.326025 0.305404 0.919941
4 0.336885 0.321342 0.910933
5 0.326935 0.305589 0.919401
6 0.347779 0.333608 0.908895
7 0.328334 0.352358 0.905482
8 0.327277 0.387525 0.912187
9 0.291777 0.293065 0.918046
10 0.228348 0.257859 0.929750


In [ ]:
learn.save('stage-1-big')

In [ ]:
learn.load('stage-1-big');

In [ ]:
learn.unfreeze()

In [ ]:
lrs = slice(1e-6,lr/10)

In [ ]:
learn.fit_one_cycle(10, lrs)


Total time: 21:05

epoch train_loss valid_loss acc_camvid
1 0.225999 0.254650 0.930782
2 0.216341 0.256007 0.930751
3 0.209800 0.255649 0.930709
4 0.203509 0.252857 0.931682
5 0.202308 0.258498 0.931308
6 0.200796 0.257311 0.931915
7 0.204560 0.251486 0.933218
8 0.192893 0.254977 0.932786
9 0.189505 0.258091 0.932555
10 0.190156 0.256064 0.933315


In [ ]:
learn.save('stage-2-big')

In [ ]:
learn.load('stage-2-big');

In [ ]:
learn.show_results(rows=3, figsize=(10,10))



In [ ]:

fin


In [ ]: