Practical Deep Learning for Coders, v3

Lesson3_camvid

Image segmentation with CamVid

用CamVid数据集进行图像分割


In [ ]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:
from fastai.vision import *
from fastai.callbacks.hooks import *
from fastai.utils.mem import *

In [ ]:
path = untar_data(URLs.CAMVID)
path.ls()


Out[ ]:
[PosixPath('/home/ubuntu/.fastai/data/camvid/images'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/codes.txt'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/valid.txt')]

In [ ]:
path_lbl = path/'labels'
path_img = path/'images'

Subset classes 子集类


In [ ]:
# path = Path('./data/camvid-small')

# def get_y_fn(x): return Path(str(x.parent)+'annot')/x.name

# codes = array(['Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree',
#     'Sign', 'Fence', 'Car', 'Pedestrian', 'Cyclist', 'Void'])

# src = (SegmentationItemList.from_folder(path)
#        .split_by_folder(valid='val')
#        .label_from_func(get_y_fn, classes=codes))

# bs=8
# data = (src.transform(get_transforms(), tfm_y=True)
#         .databunch(bs=bs)
#         .normalize(imagenet_stats))

Data 数据


In [ ]:
fnames = get_image_files(path_img)
fnames[:3]


Out[ ]:
[PosixPath('/home/ubuntu/.fastai/data/camvid/images/0001TP_008160.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0016E5_08041.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/images/0006R0_f02340.png')]

In [ ]:
lbl_names = get_image_files(path_lbl)
lbl_names[:3]


Out[ ]:
[PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05400_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0006R0_f02640_P.png'),
 PosixPath('/home/ubuntu/.fastai/data/camvid/labels/0016E5_05340_P.png')]

In [ ]:
img_f = fnames[0]
img = open_image(img_f)
img.show(figsize=(5,5))



In [ ]:
get_y_fn = lambda x: path_lbl/f'{x.stem}_P{x.suffix}'

In [ ]:
mask = open_mask(get_y_fn(img_f))
mask.show(figsize=(5,5), alpha=1)



In [ ]:
src_size = np.array(mask.shape[1:])
src_size,mask.data


Out[ ]:
(array([720, 960]), tensor([[[ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          [ 4,  4,  4,  ...,  4,  4,  4],
          ...,
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30],
          [17, 17, 17,  ..., 30, 30, 30]]]))

In [ ]:
codes = np.loadtxt(path/'codes.txt', dtype=str); codes


Out[ ]:
array(['Animal', 'Archway', 'Bicyclist', 'Bridge', 'Building', 'Car', 'CartLuggagePram', 'Child', 'Column_Pole',
       'Fence', 'LaneMkgsDriv', 'LaneMkgsNonDriv', 'Misc_Text', 'MotorcycleScooter', 'OtherMoving', 'ParkingBlock',
       'Pedestrian', 'Road', 'RoadShoulder', 'Sidewalk', 'SignSymbol', 'Sky', 'SUVPickupTruck', 'TrafficCone',
       'TrafficLight', 'Train', 'Tree', 'Truck_Bus', 'Tunnel', 'VegetationMisc', 'Void', 'Wall'], dtype='<U17')

Datasets 数据集


In [ ]:
size = src_size//2

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=8
else:           bs=4
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:
src = (SegmentationItemList.from_folder(path_img)
       .split_by_fname_file('../valid.txt')
       .label_from_func(get_y_fn, classes=codes))

In [ ]:
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:
data.show_batch(2, figsize=(10,7))



In [ ]:
data.show_batch(2, figsize=(10,7), ds_type=DatasetType.Valid)


Model 模型


In [ ]:
name2id = {v:k for k,v in enumerate(codes)}
void_code = name2id['Void']

def acc_camvid(input, target):
    target = target.squeeze(1)
    mask = target != void_code
    return (input.argmax(dim=1)[mask]==target[mask]).float().mean()

In [ ]:
metrics=acc_camvid
# metrics=accuracy

In [ ]:
wd=1e-2

In [ ]:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:
lr_find(learn)
learn.recorder.plot()


LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:
lr=3e-3

In [ ]:
learn.fit_one_cycle(10, slice(lr), pct_start=0.9)


Total time: 05:30

epoch train_loss valid_loss acc_camvid
1 1.172738 0.771233 0.820753
2 0.799105 0.637969 0.825231
3 0.731867 0.628517 0.824456
4 0.630086 0.486045 0.860029
5 0.616145 0.535374 0.860695
6 0.588540 0.439988 0.879501
7 0.551265 0.412592 0.884478
8 0.568172 0.679087 0.832221
9 0.538392 0.405911 0.879605
10 0.451235 0.353972 0.892891


In [ ]:
learn.save('stage-1')

In [ ]:
learn.load('stage-1');

In [ ]:
learn.show_results(rows=3, figsize=(8,9))



In [ ]:
learn.unfreeze()

In [ ]:
lrs = slice(lr/400,lr/4)

In [ ]:
learn.fit_one_cycle(12, lrs, pct_start=0.8)


Total time: 06:39

epoch train_loss valid_loss acc_camvid
1 0.389135 0.334715 0.896700
2 0.377873 0.324080 0.900284
3 0.369020 0.325073 0.904146
4 0.355022 0.308820 0.912556
5 0.351138 0.313001 0.909351
6 0.347777 0.285509 0.920183
7 0.338683 0.306076 0.909899
8 0.318913 0.303712 0.915792
9 0.312038 0.276126 0.920137
10 0.311217 0.276649 0.925244
11 0.285135 0.268458 0.922453
12 0.256778 0.262011 0.926964


In [ ]:
learn.save('stage-2');

Go big 用更大的数据集进行训练

You may have to restart your kernel and come back to this stage if you run out of memory, and may also need to decrease bs.
如果内存不够的话,你可能需要重启你的计算内核,然后再返回这一步,同时可能要减少 bs 的设定。


In [ ]:
learn.destroy()

size = src_size

free = gpu_mem_get_free_no_cache()
# the max size of bs depends on the available GPU RAM
if free > 8200: bs=3
else:           bs=1
print(f"using bs={bs}, have {free}MB of GPU RAM free")

In [ ]:
data = (src.transform(get_transforms(), size=size, tfm_y=True)
        .databunch(bs=bs)
        .normalize(imagenet_stats))

In [ ]:
learn = unet_learner(data, models.resnet34, metrics=metrics, wd=wd)

In [ ]:
learn.load('stage-2');

In [ ]:
lr_find(learn)
learn.recorder.plot()


LR Finder is complete, type {learner_name}.recorder.plot() to see the graph.

In [ ]:
lr=1e-3

In [ ]:
learn.fit_one_cycle(10, slice(lr), pct_start=0.8)


Total time: 20:31

epoch train_loss valid_loss acc_camvid
1 0.366140 0.339497 0.908682
2 0.333278 0.304011 0.916702
3 0.326025 0.305404 0.919941
4 0.336885 0.321342 0.910933
5 0.326935 0.305589 0.919401
6 0.347779 0.333608 0.908895
7 0.328334 0.352358 0.905482
8 0.327277 0.387525 0.912187
9 0.291777 0.293065 0.918046
10 0.228348 0.257859 0.929750


In [ ]:
learn.save('stage-1-big')

In [ ]:
learn.load('stage-1-big');

In [ ]:
learn.unfreeze()

In [ ]:
lrs = slice(1e-6,lr/10)

In [ ]:
learn.fit_one_cycle(10, lrs)


Total time: 21:05

epoch train_loss valid_loss acc_camvid
1 0.225999 0.254650 0.930782
2 0.216341 0.256007 0.930751
3 0.209800 0.255649 0.930709
4 0.203509 0.252857 0.931682
5 0.202308 0.258498 0.931308
6 0.200796 0.257311 0.931915
7 0.204560 0.251486 0.933218
8 0.192893 0.254977 0.932786
9 0.189505 0.258091 0.932555
10 0.190156 0.256064 0.933315


In [ ]:
learn.save('stage-2-big')

In [ ]:
learn.load('stage-2-big');

In [ ]:
learn.show_results(rows=3, figsize=(10,10))