Practical Deep Learning for Coders, v3

Lesson3_camvid

Image segmentation with CamVid

用CamVid数据集进行图像分割



In [ ]:

    
%reload_ext autoreload
%autoreload 2
%matplotlib inline



In [ ]:

    
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *



In [ ]:

    
path = untar_data(URLs.CAMVID)
path.ls()









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/camvid/images'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]



In [ ]:

    
path_lbl = path/'labels'
path_img = path/'images'

Subset classes 子集类



In [ ]:

    
# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data 数据



In [ ]:

    
fnames = get_image_files(path_img)
fnames[:3]









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]



In [ ]:

    
lbl_names = get_image_files(path_lbl)
lbl_names[:3]









    Out[ ]:





[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]



In [ ]:

    
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))



In [ ]:

    
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'



In [ ]:

    
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)



In [ ]:

    
src_size = np.array(mask.shape[1:])
src_size,mask.data









    Out[ ]:





(array([720, 960]), tensor([[[ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          ...,
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30]]]))



In [ ]:

    
codes = np.loadtxt(path/'codes.txt', dtype=str); codes









    Out[ ]:





array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets 数据集



In [ ]:

    
size = src_size//2

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else:           bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")



In [ ]:

    
src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt')
       .label_from_func(get_y_fn, classes=codes))



In [ ]:

    
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))



In [ ]:

    
data.show_batch(2, figsize=(10,7))



In [ ]:

    
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)

Model 模型



In [ ]:

    
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()



In [ ]:

    
metrics=acc_camvid
# metrics=accuracy



In [ ]:

    
wd=1e-2



In [ ]:

    
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)



In [ ]:

    
lr_find(learn)
learn.recorder.plot()









    



LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.



In [ ]:

    
lr=3e-3



In [ ]:

    
learn.fit_one_cycle(10, slice(lr), pct_start=0.9)









    




Total time: 05:30 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    1.172738
    0.771233
    0.820753
  
  
    2
    0.799105
    0.637969
    0.825231
  
  
    3
    0.731867
    0.628517
    0.824456
  
  
    4
    0.630086
    0.486045
    0.860029
  
  
    5
    0.616145
    0.535374
    0.860695
  
  
    6
    0.588540
    0.439988
    0.879501
  
  
    7
    0.551265
    0.412592
    0.884478
  
  
    8
    0.568172
    0.679087
    0.832221
  
  
    9
    0.538392
    0.405911
    0.879605
  
  
    10
    0.451235
    0.353972
    0.892891



In [ ]:

    
learn.save('stage-1')



In [ ]:

    
learn.load('stage-1');



In [ ]:

    
learn.show_results(rows=3, figsize=(8,9))



In [ ]:

    
learn.unfreeze()



In [ ]:

    
lrs = slice(lr/400,lr/4)



In [ ]:

    
learn.fit_one_cycle(12, lrs, pct_start=0.8)









    




Total time: 06:39 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    0.389135
    0.334715
    0.896700
  
  
    2
    0.377873
    0.324080
    0.900284
  
  
    3
    0.369020
    0.325073
    0.904146
  
  
    4
    0.355022
    0.308820
    0.912556
  
  
    5
    0.351138
    0.313001
    0.909351
  
  
    6
    0.347777
    0.285509
    0.920183
  
  
    7
    0.338683
    0.306076
    0.909899
  
  
    8
    0.318913
    0.303712
    0.915792
  
  
    9
    0.312038
    0.276126
    0.920137
  
  
    10
    0.311217
    0.276649
    0.925244
  
  
    11
    0.285135
    0.268458
    0.922453
  
  
    12
    0.256778
    0.262011
    0.926964



In [ ]:

    
learn.save('stage-2');

Go big 用更大的数据集进行训练

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.
如果内存不够的话，你可能需要重启你的计算内核，然后再返回这一步，同时可能要减少 bs 的设定。



In [ ]:

    
learn.destroy()

size = src_size

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else:           bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")



In [ ]:

    
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))



In [ ]:

    
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)



In [ ]:

    
learn.load('stage-2');



In [ ]:

    
lr_find(learn)
learn.recorder.plot()









    



LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.



In [ ]:

    
lr=1e-3



In [ ]:

    
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)









    




Total time: 20:31 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    0.366140
    0.339497
    0.908682
  
  
    2
    0.333278
    0.304011
    0.916702
  
  
    3
    0.326025
    0.305404
    0.919941
  
  
    4
    0.336885
    0.321342
    0.910933
  
  
    5
    0.326935
    0.305589
    0.919401
  
  
    6
    0.347779
    0.333608
    0.908895
  
  
    7
    0.328334
    0.352358
    0.905482
  
  
    8
    0.327277
    0.387525
    0.912187
  
  
    9
    0.291777
    0.293065
    0.918046
  
  
    10
    0.228348
    0.257859
    0.929750



In [ ]:

    
learn.save('stage-1-big')



In [ ]:

    
learn.load('stage-1-big');



In [ ]:

    
learn.unfreeze()



In [ ]:

    
lrs = slice(1e-6,lr/10)



In [ ]:

    
learn.fit_one_cycle(10, lrs)









    




Total time: 21:05 

  
    epoch
    train_loss
    valid_loss
    acc_camvid
  
  
    1
    0.225999
    0.254650
    0.930782
  
  
    2
    0.216341
    0.256007
    0.930751
  
  
    3
    0.209800
    0.255649
    0.930709
  
  
    4
    0.203509
    0.252857
    0.931682
  
  
    5
    0.202308
    0.258498
    0.931308
  
  
    6
    0.200796
    0.257311
    0.931915
  
  
    7
    0.204560
    0.251486
    0.933218
  
  
    8
    0.192893
    0.254977
    0.932786
  
  
    9
    0.189505
    0.258091
    0.932555
  
  
    10
    0.190156
    0.256064
    0.933315



In [ ]:

    
learn.save('stage-2-big')



In [ ]:

    
learn.load('stage-2-big');



In [ ]:

    
learn.show_results(rows=3, figsize=(10,10))

epoch	train_loss	valid_loss	acc_camvid
1	1.172738	0.771233	0.820753
2	0.799105	0.637969	0.825231
3	0.731867	0.628517	0.824456
4	0.630086	0.486045	0.860029
5	0.616145	0.535374	0.860695
6	0.588540	0.439988	0.879501
7	0.551265	0.412592	0.884478
8	0.568172	0.679087	0.832221
9	0.538392	0.405911	0.879605
10	0.451235	0.353972	0.892891

epoch	train_loss	valid_loss	acc_camvid
1	0.389135	0.334715	0.896700
2	0.377873	0.324080	0.900284
3	0.369020	0.325073	0.904146
4	0.355022	0.308820	0.912556
5	0.351138	0.313001	0.909351
6	0.347777	0.285509	0.920183
7	0.338683	0.306076	0.909899
8	0.318913	0.303712	0.915792
9	0.312038	0.276126	0.920137
10	0.311217	0.276649	0.925244
11	0.285135	0.268458	0.922453
12	0.256778	0.262011	0.926964

epoch	train_loss	valid_loss	acc_camvid
1	0.366140	0.339497	0.908682
2	0.333278	0.304011	0.916702
3	0.326025	0.305404	0.919941
4	0.336885	0.321342	0.910933
5	0.326935	0.305589	0.919401
6	0.347779	0.333608	0.908895
7	0.328334	0.352358	0.905482
8	0.327277	0.387525	0.912187
9	0.291777	0.293065	0.918046
10	0.228348	0.257859	0.929750

epoch	train_loss	valid_loss	acc_camvid
1	0.225999	0.254650	0.930782
2	0.216341	0.256007	0.930751
3	0.209800	0.255649	0.930709
4	0.203509	0.252857	0.931682
5	0.202308	0.258498	0.931308
6	0.200796	0.257311	0.931915
7	0.204560	0.251486	0.933218
8	0.192893	0.254977	0.932786
9	0.189505	0.258091	0.932555
10	0.190156	0.256064	0.933315