Image segmentation with CamVid



In [ ]:

    
%reload_ext autoreload
%autoreload 2
%matplotlib inline



In [ ]:

    
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *



In [ ]:

    
path = untar_data(URLs.CAMVID)
path.ls()









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/camvid/images'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]



In [ ]:

    
path_lbl = path/'labels'
path_img = path/'images'

Subset classes



In [ ]:

    
# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data



In [ ]:

    
fnames = get_image_files(path_img)
fnames[:3]









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]



In [ ]:

    
lbl_names = get_image_files(path_lbl)
lbl_names[:3]









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]



In [ ]:

    
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))



In [ ]:

    
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'



In [ ]:

    
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)



In [ ]:

    
src_size = np.array(mask.shape[1:])
src_size,mask.data









    Out[ ]:





(array([720, 960]), tensor([[[ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          ...,
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30]]]))



In [ ]:

    
codes = np.loadtxt(path/'codes.txt', dtype=str); codes









    Out[ ]:





array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets



In [ ]:

    
size = src_size//2

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else:           bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")



In [ ]:

    
src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt')
       .label_from_func(get_y_fn, classes=codes))



In [ ]:

    
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))



In [ ]:

    
data.show_batch(2, figsize=(10,7))



In [ ]:

    
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)

Model



In [ ]:

    
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()



In [ ]:

    
metrics=acc_camvid
# metrics=accuracy



In [ ]:

    
wd=1e-2



In [ ]:

    
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)



In [ ]:

    
lr_find(learn)
learn.recorder.plot()









    



LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.



In [ ]:

    
lr=3e-3



In [ ]:

    
learn.fit_one_cycle(10, slice(lr), pct_start=0.9)









    




Total time: 05:30 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    1.172738
    0.771233
    0.820753
  
  
    2
    0.799105
    0.637969
    0.825231
  
  
    3
    0.731867
    0.628517
    0.824456
  
  
    4
    0.630086
    0.486045
    0.860029
  
  
    5
    0.616145
    0.535374
    0.860695
  
  
    6
    0.588540
    0.439988
    0.879501
  
  
    7
    0.551265
    0.412592
    0.884478
  
  
    8
    0.568172
    0.679087
    0.832221
  
  
    9
    0.538392
    0.405911
    0.879605
  
  
    10
    0.451235
    0.353972
    0.892891



In [ ]:

    
learn.save('stage-1')



In [ ]:

    
learn.load('stage-1');



In [ ]:

    
learn.show_results(rows=3, figsize=(8,9))



In [ ]:

    
learn.unfreeze()



In [ ]:

    
lrs = slice(lr/400,lr/4)



In [ ]:

    
learn.fit_one_cycle(12, lrs, pct_start=0.8)









    




Total time: 06:39 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    0.389135
    0.334715
    0.896700
  
  
    2
    0.377873
    0.324080
    0.900284
  
  
    3
    0.369020
    0.325073
    0.904146
  
  
    4
    0.355022
    0.308820
    0.912556
  
  
    5
    0.351138
    0.313001
    0.909351
  
  
    6
    0.347777
    0.285509
    0.920183
  
  
    7
    0.338683
    0.306076
    0.909899
  
  
    8
    0.318913
    0.303712
    0.915792
  
  
    9
    0.312038
    0.276126
    0.920137
  
  
    10
    0.311217
    0.276649
    0.925244
  
  
    11
    0.285135
    0.268458
    0.922453
  
  
    12
    0.256778
    0.262011
    0.926964



In [ ]:

    
learn.save('stage-2');

Go big

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.



In [ ]:

    
learn.destroy()

size = src_size

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else:           bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")



In [ ]:

    
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))



In [ ]:

    
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)



In [ ]:

    
learn.load('stage-2');



In [ ]:

    
lr_find(learn)
learn.recorder.plot()









    



LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.



In [ ]:

    
lr=1e-3



In [ ]:

    
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)









    




Total time: 20:31 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    0.366140
    0.339497
    0.908682
  
  
    2
    0.333278
    0.304011
    0.916702
  
  
    3
    0.326025
    0.305404
    0.919941
  
  
    4
    0.336885
    0.321342
    0.910933
  
  
    5
    0.326935
    0.305589
    0.919401
  
  
    6
    0.347779
    0.333608
    0.908895
  
  
    7
    0.328334
    0.352358
    0.905482
  
  
    8
    0.327277
    0.387525
    0.912187
  
  
    9
    0.291777
    0.293065
    0.918046
  
  
    10
    0.228348
    0.257859
    0.929750



In [ ]:

    
learn.save('stage-1-big')



In [ ]:

    
learn.load('stage-1-big');



In [ ]:

    
learn.unfreeze()



In [ ]:

    
lrs = slice(1e-6,lr/10)



In [ ]:

    
learn.fit_one_cycle(10, lrs)









    




Total time: 21:05 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    0.225999
    0.254650
    0.930782
  
  
    2
    0.216341
    0.256007
    0.930751
  
  
    3
    0.209800
    0.255649
    0.930709
  
  
    4
    0.203509
    0.252857
    0.931682
  
  
    5
    0.202308
    0.258498
    0.931308
  
  
    6
    0.200796
    0.257311
    0.931915
  
  
    7
    0.204560
    0.251486
    0.933218
  
  
    8
    0.192893
    0.254977
    0.932786
  
  
    9
    0.189505
    0.258091
    0.932555
  
  
    10
    0.190156
    0.256064
    0.933315



In [ ]:

    
learn.save('stage-2-big')



In [ ]:

    
learn.load('stage-2-big');



In [ ]:

    
learn.show_results(rows=3, figsize=(10,10))



In [ ]:

fin



In [ ]:

epoch	train_loss	valid_loss	acc_camvid
1	1.172738	0.771233	0.820753
2	0.799105	0.637969	0.825231
3	0.731867	0.628517	0.824456
4	0.630086	0.486045	0.860029
5	0.616145	0.535374	0.860695
6	0.588540	0.439988	0.879501
7	0.551265	0.412592	0.884478
8	0.568172	0.679087	0.832221
9	0.538392	0.405911	0.879605
10	0.451235	0.353972	0.892891

epoch	train_loss	valid_loss	acc_camvid
1	0.389135	0.334715	0.896700
2	0.377873	0.324080	0.900284
3	0.369020	0.325073	0.904146
4	0.355022	0.308820	0.912556
5	0.351138	0.313001	0.909351
6	0.347777	0.285509	0.920183
7	0.338683	0.306076	0.909899
8	0.318913	0.303712	0.915792
9	0.312038	0.276126	0.920137
10	0.311217	0.276649	0.925244
11	0.285135	0.268458	0.922453
12	0.256778	0.262011	0.926964

epoch	train_loss	valid_loss	acc_camvid
1	0.366140	0.339497	0.908682
2	0.333278	0.304011	0.916702
3	0.326025	0.305404	0.919941
4	0.336885	0.321342	0.910933
5	0.326935	0.305589	0.919401
6	0.347779	0.333608	0.908895
7	0.328334	0.352358	0.905482
8	0.327277	0.387525	0.912187
9	0.291777	0.293065	0.918046
10	0.228348	0.257859	0.929750

epoch	train_loss	valid_loss	acc_camvid
1	0.225999	0.254650	0.930782
2	0.216341	0.256007	0.930751
3	0.209800	0.255649	0.930709
4	0.203509	0.252857	0.931682
5	0.202308	0.258498	0.931308
6	0.200796	0.257311	0.931915
7	0.204560	0.251486	0.933218
8	0.192893	0.254977	0.932786
9	0.189505	0.258091	0.932555
10	0.190156	0.256064	0.933315