Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x


In [ ]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [ ]:
from fastai.conv_learner import *
from fastai.dataset import *

import json, pdb
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
torch.cuda.set_device(0)

In [ ]:
torch.backends.cudnn.benchmark=True

Setup


In [ ]:
PATH = Path('data/pascal')
trn_j = json.load((PATH / 'pascal_train2007.json').open())
IMAGES,ANNOTATIONS,CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'

cats = dict((o[ID], o['name']) for o in trn_j[CATEGORIES])
trn_fns = dict((o[ID], o[FILE_NAME]) for o in trn_j[IMAGES])
trn_ids = [o[ID] for o in trn_j[IMAGES]]

JPEGS = 'VOCdevkit/VOC2007/JPEGImages'
IMG_PATH = PATH/JPEGS

In [ ]:
def get_trn_anno():
    trn_anno = collections.defaultdict(lambda:[])
    for o in trn_j[ANNOTATIONS]:
        if not o['ignore']:
            bb = o[BBOX]
            bb = np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
            trn_anno[o[IMG_ID]].append((bb,o[CAT_ID]))
    return trn_anno

trn_anno = get_trn_anno()

In [ ]:
def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.set_xticks(np.linspace(0, 224, 8))
    ax.set_yticks(np.linspace(0, 224, 8))
    ax.grid()
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)

def draw_text(ax, xy, txt, sz=14, color='white'):
    text = ax.text(*xy, txt,
        verticalalignment='top', color=color, fontsize=sz, weight='bold')
    draw_outline(text, 1)

In [ ]:
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])

def draw_im(im, ann):
    ax = show_img(im, figsize=(16,8))
    for b,c in ann:
        b = bb_hw(b)
        draw_rect(ax, b)
        draw_text(ax, b[:2], cats[c], sz=16)

def draw_idx(i):
    im_a = trn_anno[i]
    im = open_image(IMG_PATH/trn_fns[i])
    draw_im(im, im_a)

Multi class


In [ ]:
MC_CSV = PATH/'tmp/mc.csv'

In [ ]:
trn_anno[12]


Out[ ]:
[(array([ 96, 155, 269, 350]), 7)]

In [ ]:
mc = [set([cats[p[1]] for p in trn_anno[o]]) for o in trn_ids]
mcs = [' '.join(str(p) for p in o) for o in mc]

In [ ]:
df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'clas': mcs}, columns=['fn','clas'])
df.to_csv(MC_CSV, index=False)

In [ ]:
f_model=resnet34
sz=224
bs=64

In [ ]:
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, MC_CSV, tfms=tfms, bs=bs)

In [ ]:
learn = ConvLearner.pretrained(f_model, md)
learn.opt_fn = optim.Adam

In [ ]:
lrf=learn.lr_find(1e-5,100)


epoch      trn_loss   val_loss   <lambda>                                                                              
    0      1.472438   10.808105  0.608842  


In [ ]:
learn.sched.plot(0)



In [ ]:
lr = 2e-2

In [ ]:
learn.fit(lr, 1, cycle_len=3, use_clr=(32,5))


epoch      trn_loss   val_loss   <lambda>                  
    0      0.104836   0.085015   0.972356  
    1      0.088193   0.079739   0.972461                   
    2      0.072346   0.077259   0.974114                   

Out[ ]:
[0.077258907, 0.9741135761141777]

In [ ]:
lrs = np.array([lr/100, lr/10, lr])

In [ ]:
learn.freeze_to(-2)

In [ ]:
learn.lr_find(lrs/1000)
learn.sched.plot(0)


 81%|█████████████████████████████████████████████████████████▋             | 26/32 [00:22<00:05,  1.15it/s, loss=0.33]

In [ ]:
learn.fit(lrs/10, 1, cycle_len=5, use_clr=(32,5))


epoch      trn_loss   val_loss   <lambda>                   
    0      0.063236   0.088847   0.970681  
    1      0.049675   0.079885   0.973723                   
    2      0.03693    0.076906   0.975601                   
    3      0.026645   0.075304   0.976187                   
    4      0.018805   0.074934   0.975165                   

Out[ ]:
[0.074934497, 0.97516526281833649]

In [ ]:
learn.save('mclas')

In [ ]:
learn.load('mclas')

In [ ]:
y = learn.predict()
x,_ = next(iter(md.val_dl))
x = to_np(x)

In [ ]:
fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
    ima=md.val_ds.denorm(x)[i]
    ya = np.nonzero(y[i]>0.4)[0]
    b = '\n'.join(md.classes[o] for o in ya)
    ax = show_img(ima, ax=ax)
    draw_text(ax, (0,0), b)
plt.tight_layout()


Bbox per cell

Set up data


In [ ]:
CLAS_CSV = PATH/'tmp/clas.csv'
MBB_CSV = PATH/'tmp/mbb.csv'

f_model=resnet34
sz=224
bs=64

In [ ]:
mc = [[cats[p[1]] for p in trn_anno[o]] for o in trn_ids]
id2cat = list(cats.values())
cat2id = {v:k for k,v in enumerate(id2cat)}
mcs = np.array([np.array([cat2id[p] for p in o]) for o in mc]); mcs


Out[ ]:
array([array([6]), array([14, 12]), array([ 1,  1, 14, 14, 14]), ..., array([17,  8, 14, 14, 14]), array([6]),
       array([11])], dtype=object)

In [ ]:
val_idxs = get_cv_idxs(len(trn_fns))
((val_mcs,trn_mcs),) = split_by_idx(val_idxs, mcs)

In [ ]:
mbb = [np.concatenate([p[0] for p in trn_anno[o]]) for o in trn_ids]
mbbs = [' '.join(str(p) for p in o) for o in mbb]

df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'bbox': mbbs}, columns=['fn','bbox'])
df.to_csv(MBB_CSV, index=False)

In [ ]:
df.head()


Out[ ]:
fn bbox
0 000012.jpg 96 155 269 350
1 000017.jpg 61 184 198 278 77 89 335 402
2 000023.jpg 229 8 499 244 219 229 499 333 0 1 368 116 1 2 ...
3 000026.jpg 124 89 211 336
4 000032.jpg 77 103 182 374 87 132 122 196 179 194 228 212 ...

In [ ]:
aug_tfms = [RandomRotate(3, p=0.5, tfm_y=TfmType.COORD),
            RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
            RandomFlip(tfm_y=TfmType.COORD)]
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=aug_tfms)
md = ImageClassifierData.from_csv(PATH, JPEGS, MBB_CSV, tfms=tfms, bs=bs, continuous=True, num_workers=4)

In [ ]:
import matplotlib.cm as cmx
import matplotlib.colors as mcolors
from cycler import cycler

def get_cmap(N):
    color_norm  = mcolors.Normalize(vmin=0, vmax=N-1)
    return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba

num_colr = 12
cmap = get_cmap(num_colr)
colr_list = [cmap(float(x)) for x in range(num_colr)]

In [ ]:
def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
    bb = [bb_hw(o) for o in bbox.reshape(-1,4)]
    if prs is None:  prs  = [None]*len(bb)
    if clas is None: clas = [None]*len(bb)
    ax = show_img(im, ax=ax)
    for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
        if((b[2]>0) and (pr is None or pr > thresh)):
            draw_rect(ax, b, color=colr_list[i%num_colr])
            txt = f'{i}: '
            if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c])
            if pr is not None: txt += f' {pr:.2f}'
            draw_text(ax, b[:2], txt, color=colr_list[i%num_colr])

In [ ]:
class ConcatLblDataset(Dataset):
    def __init__(self, ds, y2):
        self.ds,self.y2 = ds,y2
        self.sz = ds.sz
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x,y = self.ds[i]
        return (x, (y,self.y2[i]))

In [ ]:
trn_ds2 = ConcatLblDataset(md.trn_ds, trn_mcs)
val_ds2 = ConcatLblDataset(md.val_ds, val_mcs)
md.trn_dl.dataset = trn_ds2
md.val_dl.dataset = val_ds2

In [ ]:
x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)

In [ ]:
x,y=to_np(next(iter(md.trn_dl)))
x=md.trn_ds.ds.denorm(x)

In [ ]:
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for i,ax in enumerate(axes.flat):
    show_ground_truth(ax, x[i], y[0][i], y[1][i])
plt.tight_layout()


Set up model

We're going to make a simple first model that simply predicts what object is located in each cell of a 4x4 grid. Later on we can try to improve this.


In [ ]:
anc_grid = 4
k = 1

anc_offset = 1/(anc_grid*2)
anc_x = np.repeat(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_y = np.tile(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)

anc_ctrs = np.tile(np.stack([anc_x,anc_y], axis=1), (k,1))
anc_sizes = np.array([[1/anc_grid,1/anc_grid] for i in range(anc_grid*anc_grid)])
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()

In [ ]:
grid_sizes = V(np.array([1/anc_grid]), requires_grad=False).unsqueeze(1)

In [ ]:
plt.scatter(anc_x, anc_y)
plt.xlim(0, 1)
plt.ylim(0, 1);



In [ ]:
anchors


Out[ ]:
Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.8750  0.2500  0.2500
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]

In [ ]:
def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

In [ ]:
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchor_cnr


Out[ ]:
Variable containing:
 0.0000  0.0000  0.2500  0.2500
 0.0000  0.2500  0.2500  0.5000
 0.0000  0.5000  0.2500  0.7500
 0.0000  0.7500  0.2500  1.0000
 0.2500  0.0000  0.5000  0.2500
 0.2500  0.2500  0.5000  0.5000
 0.2500  0.5000  0.5000  0.7500
 0.2500  0.7500  0.5000  1.0000
 0.5000  0.0000  0.7500  0.2500
 0.5000  0.2500  0.7500  0.5000
 0.5000  0.5000  0.7500  0.7500
 0.5000  0.7500  0.7500  1.0000
 0.7500  0.0000  1.0000  0.2500
 0.7500  0.2500  1.0000  0.5000
 0.7500  0.5000  1.0000  0.7500
 0.7500  0.7500  1.0000  1.0000
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]

In [ ]:
n_clas = len(id2cat)+1
n_act = k*(4+n_clas)

In [ ]:
class StdConv(nn.Module):
    def __init__(self, nin, nout, stride=2, drop=0.1):
        super().__init__()
        self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
        self.bn = nn.BatchNorm2d(nout)
        self.drop = nn.Dropout(drop)
        
    def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))
        
def flatten_conv(x,k):
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)

In [ ]:
class OutConv(nn.Module):
    def __init__(self, k, nin, bias):
        super().__init__()
        self.k = k
        self.oconv1 = nn.Conv2d(nin, (len(id2cat)+1)*k, 3, padding=1)
        self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
        self.oconv1.bias.data.zero_().add_(bias)
        
    def forward(self, x):
        return [flatten_conv(self.oconv1(x), self.k),
                flatten_conv(self.oconv2(x), self.k)]

In [ ]:
class SSD_Head(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(0.25)
        self.sconv0 = StdConv(512,256, stride=1)
#         self.sconv1 = StdConv(256,256)
        self.sconv2 = StdConv(256,256)
        self.out = OutConv(k, 256, bias)
        
    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
#         x = self.sconv1(x)
        x = self.sconv2(x)
        return self.out(x)

head_reg4 = SSD_Head(k, -3.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam
k


Out[ ]:
1

Train


In [ ]:
def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]

class BCE_Loss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

    def forward(self, pred, targ):
        t = one_hot_embedding(targ, self.num_classes+1)
        t = V(t[:,:-1].contiguous())#.cpu()
        x = pred[:,:-1]
        w = self.get_weight(x,t)
        return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)/self.num_classes
    
    def get_weight(self,x,t): return None

loss_f = BCE_Loss(len(id2cat))

In [ ]:
def intersect(box_a, box_b):
    max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
    min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]

def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))

def jaccard(box_a, box_b):
    inter = intersect(box_a, box_b)
    union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
    return inter / union

In [ ]:
def get_y(bbox,clas):
    bbox = bbox.view(-1,4)/sz
    bb_keep = ((bbox[:,2]-bbox[:,0])>0).nonzero()[:,0]
    return bbox[bb_keep],clas[bb_keep]

def actn_to_bb(actn, anchors):
    actn_bbs = torch.tanh(actn)
    actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]
    actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
    return hw2corners(actn_centers, actn_hw)

def map_to_ground_truth(overlaps, print_it=False):
    prior_overlap, prior_idx = overlaps.max(1)
    if print_it: print(prior_overlap)
#     pdb.set_trace()
    gt_overlap, gt_idx = overlaps.max(0)
    gt_overlap[prior_idx] = 1.99
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    return gt_overlap,gt_idx

def ssd_1_loss(b_c,b_bb,bbox,clas,print_it=False):
    bbox,clas = get_y(bbox,clas)
    a_ic = actn_to_bb(b_bb, anchors)
    overlaps = jaccard(bbox.data, anchor_cnr.data)
    gt_overlap,gt_idx = map_to_ground_truth(overlaps,print_it)
    gt_clas = clas[gt_idx]
    pos = gt_overlap > 0.4
    pos_idx = torch.nonzero(pos)[:,0]
    gt_clas[1-pos] = len(id2cat)
    gt_bbox = bbox[gt_idx]
    loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
    clas_loss  = loss_f(b_c, gt_clas)
    return loc_loss, clas_loss

def ssd_loss(pred,targ,print_it=False):
    lcs,lls = 0.,0.
    for b_c,b_bb,bbox,clas in zip(*pred,*targ):
        loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas,print_it)
        lls += loc_loss
        lcs += clas_loss
    if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
    return lls+lcs

In [ ]:
x,y = next(iter(md.val_dl))
# x,y = V(x).cpu(),V(y)
x,y = V(x),V(y)

In [ ]:
for i,o in enumerate(y): y[i] = o.cuda()
learn.model.cuda()

In [ ]:
batch = learn.model(x)

In [ ]:
# uncomment to debug on cpu
#anchors = anchors.cpu(); grid_sizes = grid_sizes.cpu(); anchor_cnr = anchor_cnr.cpu()

In [ ]:
ssd_loss(batch, y, True)


 0.1947
 0.1168
 0.2652
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2885
 0.0888
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  9.9027
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1608
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3237
 0.2153
 0.2558
 0.2013
 0.2526
 0.0485
 0.0879
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3258
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2704
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.4538
 0.1897
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.1527
 0.1863
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3426
 0.3249
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.0642
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2027
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2418
 0.2337
 0.2590
[torch.cuda.FloatTensor of size 3 (GPU 0)]


1.00000e-02 *
  8.4642
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3652
 0.1377
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  9.5146
  5.7398
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.1148
 0.1341
 0.1650
 0.0384
 0.2213
 0.1477
 0.2520
 0.2531
 0.2129
 0.2144
 0.1795
 0.3002
 0.3057
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.2097
 0.2182
 0.2786
 0.2973
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.2568
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2184
 0.2459
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1166
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.0898
 0.0548
 0.4860
 0.0865
 0.1805
 0.2080
 0.2583
 0.0650
 0.0383
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2222
 0.1000
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  6.6300
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1940
 0.1498
 0.4352
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.1231
 0.2356
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2515
 0.2851
 0.2107
 0.2351
 0.2572
 0.1801
 0.2538
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.2544
 0.0842
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1890
 0.2767
 0.2161
 0.2104
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.1465
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3846
 0.4679
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1677
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3781
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1589
 0.1125
 0.1994
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2309
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2164
 0.4026
 0.3522
 0.2881
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.3824
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1823
 0.0647
 0.0404
 0.1737
 0.1553
 0.3090
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.2105
 0.2143
 0.1074
 0.1572
 0.1939
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.1817
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.0536
 0.2392
 0.4061
 0.0804
 0.3463
 0.3876
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.1975
 0.1799
 0.2146
 0.0935
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.2553
 0.1721
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2017
 0.0885
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4367
 0.2400
 0.1817
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2471
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3207
 0.2089
 0.6309
 0.1183
 0.2568
[torch.cuda.FloatTensor of size 5 (GPU 0)]


1.00000e-02 *
  8.3850
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1024
 0.2968
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  8.3770
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2832
 0.1478
 0.0903
 0.3304
 0.1316
 0.1940
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.4223
 0.1600
 0.2250
 0.3211
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.3666
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1067
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1610
 0.1593
 0.3415
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.3255
 0.3394
 0.3390
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2139
 0.3500
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1369
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1455
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2794
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2309
[torch.cuda.FloatTensor of size 1 (GPU 0)]


1.00000e-02 *
  6.3919
  9.1493
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4062
 0.2180
 0.1307
 0.5762
 0.1524
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.1128
[torch.cuda.FloatTensor of size 1 (GPU 0)]

loc: 10.175130844116211, clas: 72.91587829589844
Out[ ]:
Variable containing:
 83.0910
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [ ]:
learn.crit = ssd_loss
lr = 3e-3
lrs = np.array([lr/100,lr/10,lr])

In [ ]:
learn.lr_find(lrs/1000,1.)
learn.sched.plot(1)


epoch      trn_loss   val_loss                            
    0      86.852668  32587.789062


In [ ]:
learn.fit(lr, 1, cycle_len=5, use_clr=(20,10))


epoch      trn_loss   val_loss                            
    0      43.166077  32.56049  
    1      33.731625  28.329123                           
    2      29.498006  27.387726                           
    3      26.590789  26.043869                           
    4      24.470896  25.746592                           

Out[ ]:
[25.746592]

In [ ]:
learn.save('0')

In [ ]:
learn.load('0')

Testing


In [ ]:
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
learn.model.eval()
batch = learn.model(x)
b_clas,b_bb = batch

In [ ]:
b_clas.size(),b_bb.size()


Out[ ]:
(torch.Size([64, 16, 21]), torch.Size([64, 16, 4]))

In [ ]:
idx=7
b_clasi = b_clas[idx]
b_bboxi = b_bb[idx]
ima=md.val_ds.ds.denorm(to_np(x))[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
bbox,clas


Out[ ]:
(Variable containing:
  0.6786  0.4866  0.9911  0.6250
  0.7098  0.0848  0.9911  0.5491
  0.5134  0.8304  0.6696  0.9063
 [torch.cuda.FloatTensor of size 3x4 (GPU 0)], Variable containing:
   8
  10
  17
 [torch.cuda.LongTensor of size 3 (GPU 0)])

In [ ]:
def torch_gt(ax, ima, bbox, clas, prs=None, thresh=0.4):
    return show_ground_truth(ax, ima, to_np((bbox*224).long()),
         to_np(clas), to_np(prs) if prs is not None else None, thresh)

In [ ]:
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, bbox, clas)



In [ ]:
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, anchor_cnr, b_clasi.max(1)[1])



In [ ]:
grid_sizes


Out[ ]:
Variable containing:
 0.2500
[torch.cuda.FloatTensor of size 1x1 (GPU 0)]

In [ ]:
anchors


Out[ ]:
Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.8750  0.2500  0.2500
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]

In [ ]:
a_ic = actn_to_bb(b_bboxi, anchors)

In [ ]:
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, a_ic, b_clasi.max(1)[1], b_clasi.max(1)[0].sigmoid(), thresh=0.0)



In [ ]:
overlaps = jaccard(bbox.data, anchor_cnr.data)
overlaps


Out[ ]:

Columns 0 to 9 
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0091
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0356  0.0549
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 15 
 0.0922  0.0000  0.0000  0.0315  0.3985  0.0000
 0.0103  0.0000  0.2598  0.4538  0.0653  0.0000
 0.0000  0.1897  0.0000  0.0000  0.0000  0.0000
[torch.cuda.FloatTensor of size 3x16 (GPU 0)]

In [ ]:
overlaps.max(1)


Out[ ]:
(
  0.3985
  0.4538
  0.1897
 [torch.cuda.FloatTensor of size 3 (GPU 0)], 
  14
  13
  11
 [torch.cuda.LongTensor of size 3 (GPU 0)])

In [ ]:
overlaps.max(0)


Out[ ]:
(
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0356
  0.0549
  0.0922
  0.1897
  0.2598
  0.4538
  0.3985
  0.0000
 [torch.cuda.FloatTensor of size 16 (GPU 0)], 
  0
  0
  0
  0
  0
  0
  0
  0
  1
  1
  0
  2
  1
  1
  0
  0
 [torch.cuda.LongTensor of size 16 (GPU 0)])

In [ ]:
gt_overlap,gt_idx = map_to_ground_truth(overlaps)
gt_overlap,gt_idx


Out[ ]:
(
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0356
  0.0549
  0.0922
  1.9900
  0.2598
  1.9900
  1.9900
  0.0000
 [torch.cuda.FloatTensor of size 16 (GPU 0)], 
  0
  0
  0
  0
  0
  0
  0
  0
  1
  1
  0
  2
  1
  1
  0
  0
 [torch.cuda.LongTensor of size 16 (GPU 0)])

In [ ]:
gt_clas = clas[gt_idx]; gt_clas


Out[ ]:
Variable containing:
  8
  8
  8
  8
  8
  8
  8
  8
 10
 10
  8
 17
 10
 10
  8
  8
[torch.cuda.LongTensor of size 16 (GPU 0)]

In [ ]:
thresh = 0.5
pos = gt_overlap > thresh
pos_idx = torch.nonzero(pos)[:,0]
neg_idx = torch.nonzero(1-pos)[:,0]
pos_idx


Out[ ]:
 11
 13
 14
[torch.cuda.LongTensor of size 3 (GPU 0)]

In [ ]:
gt_clas[1-pos] = len(id2cat)
[id2cat[o] if o<len(id2cat) else 'bg' for o in gt_clas.data]


Out[ ]:
['bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'sofa',
 'bg',
 'diningtable',
 'chair',
 'bg']

In [ ]:
gt_bbox = bbox[gt_idx]
loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
clas_loss  = F.cross_entropy(b_clasi, gt_clas)
loc_loss,clas_loss


Out[ ]:
(Variable containing:
 1.00000e-02 *
   6.5691
 [torch.cuda.FloatTensor of size 1 (GPU 0)], Variable containing:
  1.1215
 [torch.cuda.FloatTensor of size 1 (GPU 0)])

In [ ]:
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
    ima=md.val_ds.ds.denorm(to_np(x))[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    ima=md.val_ds.ds.denorm(to_np(x))[idx]
    bbox,clas = get_y(bbox,clas); bbox,clas
    a_ic = actn_to_bb(b_bb[idx], anchors)
    torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.01)
plt.tight_layout()


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

More anchors!

Create anchors


In [ ]:
anc_grids = [4,2,1]
# anc_grids = [2]
anc_zooms = [0.7, 1., 1.3]
# anc_zooms = [1.]
anc_ratios = [(1.,1.), (1.,0.5), (0.5,1.)]
# anc_ratios = [(1.,1.)]
anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
k = len(anchor_scales)
anc_offsets = [1/(o*2) for o in anc_grids]
k


Out[ ]:
9

In [ ]:
anc_x = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)

In [ ]:
anc_sizes  =   np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids])
grid_sizes = V(np.concatenate([np.array([ 1/ag       for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids]), requires_grad=False).unsqueeze(1)
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])

In [ ]:
anchors


Out[ ]:
Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.1250  0.2500  0.1250
 0.1250  0.1250  0.1250  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.3750  0.2500  0.1250
 0.1250  0.3750  0.1250  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.6250  0.2500  0.1250
 0.1250  0.6250  0.1250  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.1250  0.8750  0.2500  0.1250
 0.1250  0.8750  0.1250  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.1250  0.2500  0.1250
 0.3750  0.1250  0.1250  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.3750  0.2500  0.1250
 0.3750  0.3750  0.1250  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.6250  0.2500  0.1250
 0.3750  0.6250  0.1250  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.3750  0.8750  0.2500  0.1250
 0.3750  0.8750  0.1250  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.1250  0.2500  0.1250
 0.6250  0.1250  0.1250  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.3750  0.2500  0.1250
 0.6250  0.3750  0.1250  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.6250  0.2500  0.1250
 0.6250  0.6250  0.1250  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.6250  0.8750  0.2500  0.1250
 0.6250  0.8750  0.1250  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.1250  0.2500  0.1250
 0.8750  0.1250  0.1250  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.3750  0.2500  0.1250
 0.8750  0.3750  0.1250  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.6250  0.2500  0.1250
 0.8750  0.6250  0.1250  0.2500
 0.8750  0.8750  0.2500  0.2500
 0.8750  0.8750  0.2500  0.1250
 0.8750  0.8750  0.1250  0.2500
 0.2500  0.2500  0.5000  0.5000
 0.2500  0.2500  0.5000  0.2500
 0.2500  0.2500  0.2500  0.5000
 0.2500  0.7500  0.5000  0.5000
 0.2500  0.7500  0.5000  0.2500
 0.2500  0.7500  0.2500  0.5000
 0.7500  0.2500  0.5000  0.5000
 0.7500  0.2500  0.5000  0.2500
 0.7500  0.2500  0.2500  0.5000
 0.7500  0.7500  0.5000  0.5000
 0.7500  0.7500  0.5000  0.2500
 0.7500  0.7500  0.2500  0.5000
 0.5000  0.5000  1.0000  1.0000
 0.5000  0.5000  1.0000  0.5000
 0.5000  0.5000  0.5000  1.0000
[torch.cuda.FloatTensor of size 63x4 (GPU 0)]

In [ ]:
x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)

In [ ]:
a=np.reshape((to_np(anchor_cnr) + to_np(torch.randn(*anchor_cnr.size()))*0.01)*224, -1)

In [ ]:
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

In [ ]:
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Model


In [ ]:
drop=0.4

class SSD_MultiHead(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(drop)
        self.sconv0 = StdConv(512,256, stride=1, drop=drop)
        self.sconv1 = StdConv(256,256, drop=drop)
        self.sconv2 = StdConv(256,256, drop=drop)
        self.sconv3 = StdConv(256,256, drop=drop)
        self.out0 = OutConv(k, 256, bias)
        self.out1 = OutConv(k, 256, bias)
        self.out2 = OutConv(k, 256, bias)
        self.out3 = OutConv(k, 256, bias)

    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
        x = self.sconv1(x)
        o1c,o1l = self.out1(x)
        x = self.sconv2(x)
        o2c,o2l = self.out2(x)
        x = self.sconv3(x)
        o3c,o3l = self.out3(x)
        return [torch.cat([o1c,o2c,o3c], dim=1),
                torch.cat([o1l,o2l,o3l], dim=1)]

head_reg4 = SSD_MultiHead(k, -4.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam

In [ ]:
learn.crit = ssd_loss
lr = 1e-2
lrs = np.array([lr/100,lr/10,lr])

In [ ]:
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(V(x))

In [ ]:
batch[0].size(),batch[1].size()


Out[ ]:
(torch.Size([64, 189, 21]), torch.Size([64, 189, 4]))

In [ ]:
ssd_loss(batch, y, True)


 0.5598
 0.7922
 0.3095
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5718
 0.7035
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7268
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5163
 0.4763
 0.4033
 0.4986
 0.2990
 0.0887
 0.1046
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3789
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5153
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.6215
 0.5547
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.4246
 0.5208
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3436
 0.3257
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.9734
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3900
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7722
 0.5395
 0.6392
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.7618
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4679
 0.8070
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6597
 0.1274
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.2584
 0.1665
 0.2373
 0.0872
 0.3571
 0.1477
 0.2520
 0.4103
 0.3394
 0.2884
 0.4922
 0.3787
 0.3083
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.3465
 0.4702
 0.3075
 0.3699
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.6350
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4666
 0.5763
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6857
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1651
 0.0595
 0.6267
 0.2088
 0.3256
 0.3768
 0.4768
 0.0901
 0.0670
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2862
 0.6248
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.9427
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6937
 0.7272
 0.4980
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.7237
 0.9103
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2828
 0.3771
 0.4462
 0.4403
 0.4001
 0.4693
 0.2927
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3013
 0.1132
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4877
 0.3506
 0.2161
 0.5820
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.7152
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5290
 0.6212
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6086
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5147
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6993
 0.1816
 0.3097
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5795
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5134
 0.5408
 0.3522
 0.4801
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.4327
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4907
 0.1219
 0.0792
 0.5814
 0.5660
 0.6971
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.5459
 0.3704
 0.1074
 0.1848
 0.4760
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.4316
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1364
 0.6287
 0.4061
 0.1304
 0.3701
 0.4181
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.5735
 0.2463
 0.2852
 0.6738
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5688
 0.6468
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2888
 0.7060
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4828
 0.2400
 0.2915
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.3020
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4500
 0.2089
 0.7658
 0.5281
 0.4424
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.7547
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7615
 0.4178
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7539
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2938
 0.1583
 0.1342
 0.5076
 0.1785
 0.2339
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.6536
 0.4844
 0.4022
 0.3861
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5740
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5858
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7901
 0.5316
 0.5802
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4405
 0.4869
 0.5088
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5414
 0.5224
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7278
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7401
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4864
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4314
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.9778
 0.7884
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4372
 0.5654
 0.2238
 0.5762
 0.6364
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.7330
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.5301
[torch.cuda.FloatTensor of size 1 (GPU 0)]
 Variable containing:
 61.3364
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Out[ ]:
Variable containing:
 65.8664
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [ ]:
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=2)


 94%|█████████▍| 30/32 [00:18<00:01,  1.65it/s, loss=89.7]

In [ ]:
learn.fit(lrs, 1, cycle_len=4, use_clr=(20,8))


epoch      trn_loss   val_loss                            
    0      23.020269  22.007149 
    1      19.23732   15.323267                           
    2      16.612079  13.967303                           
    3      14.706582  12.920008                           

Out[ ]:
[12.920008]

In [ ]:
learn.save('tmp')

In [ ]:
learn.freeze_to(-2)
learn.fit(lrs/2, 1, cycle_len=4, use_clr=(20,8))


epoch      trn_loss   val_loss                            
    0      14.021227  17.886932 
    1      13.386686  12.754044                           
    2      12.297876  11.913645                           
    3      11.29237   11.368293                           

Out[ ]:
[11.368293]

In [ ]:
learn.save('prefocal')

In [ ]:
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)

fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
    ima=md.val_ds.ds.denorm(x)[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.21)
plt.tight_layout()


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Focal loss


In [ ]:
def plot_results(thresh):
    x,y = next(iter(md.val_dl))
    y = V(y)
    batch = learn.model(V(x))
    b_clas,b_bb = batch

    x = to_np(x)
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    for idx,ax in enumerate(axes.flat):
        ima=md.val_ds.ds.denorm(x)[idx]
        bbox,clas = get_y(y[0][idx], y[1][idx])
        a_ic = actn_to_bb(b_bb[idx], anchors)
        clas_pr, clas_ids = b_clas[idx].max(1)
        clas_pr = clas_pr.sigmoid()
        torch_gt(ax, ima, a_ic, clas_ids, clas_pr, clas_pr.max().data[0]*thresh)
    plt.tight_layout()

In [ ]:
class FocalLoss(BCE_Loss):
    def get_weight(self,x,t):
        alpha,gamma = 0.25,1
        p = x.sigmoid()
        pt = p*t + (1-p)*(1-t)
        w = alpha*t + (1-alpha)*(1-t)
        return w * (1-pt).pow(gamma)

loss_f = FocalLoss(len(id2cat))

In [ ]:
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(x)
ssd_loss(batch, y, True)


 0.5598
 0.7922
 0.3095
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5718
 0.7035
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7268
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5163
 0.4763
 0.4033
 0.4986
 0.2990
 0.0887
 0.1046
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3789
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5153
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.6215
 0.5547
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.4246
 0.5208
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3436
 0.3257
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.9734
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3900
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7722
 0.5395
 0.6392
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.7618
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4679
 0.8070
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6597
 0.1274
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.2584
 0.1665
 0.2373
 0.0872
 0.3571
 0.1477
 0.2520
 0.4103
 0.3394
 0.2884
 0.4922
 0.3787
 0.3083
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.3465
 0.4702
 0.3075
 0.3699
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.6350
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4666
 0.5763
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6857
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1651
 0.0595
 0.6267
 0.2088
 0.3256
 0.3768
 0.4768
 0.0901
 0.0670
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2862
 0.6248
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.9427
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6937
 0.7272
 0.4980
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.7237
 0.9103
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2828
 0.3771
 0.4462
 0.4403
 0.4001
 0.4693
 0.2927
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3013
 0.1132
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4877
 0.3506
 0.2161
 0.5820
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.7152
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5290
 0.6212
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6086
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5147
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6993
 0.1816
 0.3097
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5795
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5134
 0.5408
 0.3522
 0.4801
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.4327
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4907
 0.1219
 0.0792
 0.5814
 0.5660
 0.6971
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.5459
 0.3704
 0.1074
 0.1848
 0.4760
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.4316
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1364
 0.6287
 0.4061
 0.1304
 0.3701
 0.4181
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.5735
 0.2463
 0.2852
 0.6738
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5688
 0.6468
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2888
 0.7060
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4828
 0.2400
 0.2915
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.3020
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4500
 0.2089
 0.7658
 0.5281
 0.4424
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.7547
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7615
 0.4178
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7539
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2938
 0.1583
 0.1342
 0.5076
 0.1785
 0.2339
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.6536
 0.4844
 0.4022
 0.3861
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5740
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5858
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7901
 0.5316
 0.5802
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4405
 0.4869
 0.5088
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5414
 0.5224
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7278
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7401
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4864
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4314
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.9778
 0.7884
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4372
 0.5654
 0.2238
 0.5762
 0.6364
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.7330
[torch.cuda.FloatTensor of size 1 (GPU 0)]

loc: 3.6088805198669434, clas: 7.331346035003662
Out[ ]:
Variable containing:
 10.9402
[torch.cuda.FloatTensor of size 1 (GPU 0)]

In [ ]:
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=1)


 19%|█▉        | 6/32 [00:05<00:24,  1.07it/s, loss=67.7]
Exception in thread Thread-228:
Traceback (most recent call last):
  File "/home/jhoward/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/jhoward/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/jhoward/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration

 72%|███████▏  | 23/32 [00:15<00:06,  1.44it/s, loss=nan] 

In [ ]:
learn.fit(lrs, 1, cycle_len=10, use_clr=(20,10))


epoch      trn_loss   val_loss                            
    0      27.244542  32.936592 
    1      24.687115  22.024887                           
    2      22.251388  19.824711                           
    3      20.238359  18.530561                           
    4      18.613792  17.730865                           
    5      17.376519  17.323956                           
    6      16.33999   16.968851                           
    7      15.425277  16.894522                           
    8      14.683091  16.533207                           
    9      14.044275  16.332354                           

Out[ ]:
[16.332354]

In [ ]:
learn.save('fl0')

In [ ]:
learn.load('fl0')

In [ ]:
learn.freeze_to(-2)
learn.fit(lrs/4, 1, cycle_len=10, use_clr=(20,10))


epoch      trn_loss   val_loss                            
    0      13.991107  17.163681 
    1      14.275143  16.685173                           
    2      13.81701   16.067303                           
    3      13.172081  15.567028                           
    4      12.474847  15.480181                           
    5      11.774984  15.262911                           
    6      11.21864   15.10038                            
    7      10.711037  15.184849                           
    8      10.215424  14.942656                           
    9      9.871555   14.910997                           

Out[ ]:
[14.910997]

In [ ]:
learn.save('drop4')

In [ ]:
learn.load('drop4')

In [ ]:
plot_results(0.75)


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

NMS


In [ ]:
def nms(boxes, scores, overlap=0.5, top_k=100):
    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0: return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        keep[count] = i
        count += 1
        if idx.size(0) == 1: break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count

In [ ]:
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)

In [ ]:
def show_nmf(idx):
    ima=md.val_ds.ds.denorm(x)[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    clas_pr, clas_ids = b_clas[idx].max(1)
    clas_pr = clas_pr.sigmoid()

    conf_scores = b_clas[idx].sigmoid().t().data

    out1,out2,cc = [],[],[]
    for cl in range(0, len(conf_scores)-1):
        c_mask = conf_scores[cl] > 0.25
        if c_mask.sum() == 0: continue
        scores = conf_scores[cl][c_mask]
        l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
        boxes = a_ic[l_mask].view(-1, 4)
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        out1.append(scores[ids])
        out2.append(boxes.data[ids])
        cc.append([cl]*count)
    if not cc:
        print(f"{i}: empty array")
        return
    cc = T(np.concatenate(cc))
    out1 = torch.cat(out1)
    out2 = torch.cat(out2)

    fig, ax = plt.subplots(figsize=(8,8))
    torch_gt(ax, ima, out2, cc, out1, 0.1)

In [ ]:
for i in range(12): show_nmf(i)


Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

End


In [ ]: