Important: This notebook will only work with fastai-0.7.x. Do not try to run any fastai-1.x code from this path in the repository because it will load fastai-0.7.x



In [ ]:

    
%matplotlib inline
%reload_ext autoreload
%autoreload 2



In [ ]:

    
from fastai.conv_learner import *
from fastai.dataset import *

import json, pdb
from PIL import ImageDraw, ImageFont
from matplotlib import patches, patheffects
torch.cuda.set_device(0)



In [ ]:

    
torch.backends.cudnn.benchmark=True

Setup



In [ ]:

    
PATH = Path('data/pascal')
trn_j = json.load((PATH / 'pascal_train2007.json').open())
IMAGES,ANNOTATIONS,CATEGORIES = ['images', 'annotations', 'categories']
FILE_NAME,ID,IMG_ID,CAT_ID,BBOX = 'file_name','id','image_id','category_id','bbox'

cats = dict((o[ID], o['name']) for o in trn_j[CATEGORIES])
trn_fns = dict((o[ID], o[FILE_NAME]) for o in trn_j[IMAGES])
trn_ids = [o[ID] for o in trn_j[IMAGES]]

JPEGS = 'VOCdevkit/VOC2007/JPEGImages'
IMG_PATH = PATH/JPEGS



In [ ]:

    
def get_trn_anno():
    trn_anno = collections.defaultdict(lambda:[])
    for o in trn_j[ANNOTATIONS]:
        if not o['ignore']:
            bb = o[BBOX]
            bb = np.array([bb[1], bb[0], bb[3]+bb[1]-1, bb[2]+bb[0]-1])
            trn_anno[o[IMG_ID]].append((bb,o[CAT_ID]))
    return trn_anno

trn_anno = get_trn_anno()



In [ ]:

    
def show_img(im, figsize=None, ax=None):
    if not ax: fig,ax = plt.subplots(figsize=figsize)
    ax.imshow(im)
    ax.set_xticks(np.linspace(0, 224, 8))
    ax.set_yticks(np.linspace(0, 224, 8))
    ax.grid()
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    return ax

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])

def draw_rect(ax, b, color='white'):
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor=color, lw=2))
    draw_outline(patch, 4)

def draw_text(ax, xy, txt, sz=14, color='white'):
    text = ax.text(*xy, txt,
        verticalalignment='top', color=color, fontsize=sz, weight='bold')
    draw_outline(text, 1)



In [ ]:

    
def bb_hw(a): return np.array([a[1],a[0],a[3]-a[1]+1,a[2]-a[0]+1])

def draw_im(im, ann):
    ax = show_img(im, figsize=(16,8))
    for b,c in ann:
        b = bb_hw(b)
        draw_rect(ax, b)
        draw_text(ax, b[:2], cats[c], sz=16)

def draw_idx(i):
    im_a = trn_anno[i]
    im = open_image(IMG_PATH/trn_fns[i])
    draw_im(im, im_a)

Multi class



In [ ]:

    
MC_CSV = PATH/'tmp/mc.csv'



In [ ]:

    
trn_anno[12]









    Out[ ]:





[(array([ 96, 155, 269, 350]), 7)]



In [ ]:

    
mc = [set([cats[p[1]] for p in trn_anno[o]]) for o in trn_ids]
mcs = [' '.join(str(p) for p in o) for o in mc]



In [ ]:

    
df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'clas': mcs}, columns=['fn','clas'])
df.to_csv(MC_CSV, index=False)



In [ ]:

    
f_model=resnet34
sz=224
bs=64



In [ ]:

    
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO)
md = ImageClassifierData.from_csv(PATH, JPEGS, MC_CSV, tfms=tfms, bs=bs)



In [ ]:

    
learn = ConvLearner.pretrained(f_model, md)
learn.opt_fn = optim.Adam



In [ ]:

    
lrf=learn.lr_find(1e-5,100)









    





 
 










    



epoch      trn_loss   val_loss   <lambda>                                                                              
    0      1.472438   10.808105  0.608842



In [ ]:

    
learn.sched.plot(0)



In [ ]:

    
lr = 2e-2



In [ ]:

    
learn.fit(lr, 1, cycle_len=3, use_clr=(32,5))









    





 
 










    



epoch      trn_loss   val_loss   <lambda>                  
    0      0.104836   0.085015   0.972356  
    1      0.088193   0.079739   0.972461                   
    2      0.072346   0.077259   0.974114                   







    Out[ ]:





[0.077258907, 0.9741135761141777]



In [ ]:

    
lrs = np.array([lr/100, lr/10, lr])



In [ ]:

    
learn.freeze_to(-2)



In [ ]:

    
learn.lr_find(lrs/1000)
learn.sched.plot(0)









    





 
 










    



 81%|█████████████████████████████████████████████████████████▋             | 26/32 [00:22<00:05,  1.15it/s, loss=0.33]



In [ ]:

    
learn.fit(lrs/10, 1, cycle_len=5, use_clr=(32,5))









    





 
 










    



epoch      trn_loss   val_loss   <lambda>                   
    0      0.063236   0.088847   0.970681  
    1      0.049675   0.079885   0.973723                   
    2      0.03693    0.076906   0.975601                   
    3      0.026645   0.075304   0.976187                   
    4      0.018805   0.074934   0.975165                   







    Out[ ]:





[0.074934497, 0.97516526281833649]



In [ ]:

    
learn.save('mclas')



In [ ]:

    
learn.load('mclas')



In [ ]:

    
y = learn.predict()
x,_ = next(iter(md.val_dl))
x = to_np(x)



In [ ]:

    
fig, axes = plt.subplots(3, 4, figsize=(12, 8))
for i,ax in enumerate(axes.flat):
    ima=md.val_ds.denorm(x)[i]
    ya = np.nonzero(y[i]>0.4)[0]
    b = '\n'.join(md.classes[o] for o in ya)
    ax = show_img(ima, ax=ax)
    draw_text(ax, (0,0), b)
plt.tight_layout()

Bbox per cell

Set up data



In [ ]:

    
CLAS_CSV = PATH/'tmp/clas.csv'
MBB_CSV = PATH/'tmp/mbb.csv'

f_model=resnet34
sz=224
bs=64



In [ ]:

    
mc = [[cats[p[1]] for p in trn_anno[o]] for o in trn_ids]
id2cat = list(cats.values())
cat2id = {v:k for k,v in enumerate(id2cat)}
mcs = np.array([np.array([cat2id[p] for p in o]) for o in mc]); mcs









    Out[ ]:





array([array([6]), array([14, 12]), array([ 1,  1, 14, 14, 14]), ..., array([17,  8, 14, 14, 14]), array([6]),
       array([11])], dtype=object)



In [ ]:

    
val_idxs = get_cv_idxs(len(trn_fns))
((val_mcs,trn_mcs),) = split_by_idx(val_idxs, mcs)



In [ ]:

    
mbb = [np.concatenate([p[0] for p in trn_anno[o]]) for o in trn_ids]
mbbs = [' '.join(str(p) for p in o) for o in mbb]

df = pd.DataFrame({'fn': [trn_fns[o] for o in trn_ids], 'bbox': mbbs}, columns=['fn','bbox'])
df.to_csv(MBB_CSV, index=False)



In [ ]:

    
df.head()









    Out[ ]:







  
    
      
      fn
      bbox
    
  
  
    
      0
      000012.jpg
      96 155 269 350
    
    
      1
      000017.jpg
      61 184 198 278 77 89 335 402
    
    
      2
      000023.jpg
      229 8 499 244 219 229 499 333 0 1 368 116 1 2 ...
    
    
      3
      000026.jpg
      124 89 211 336
    
    
      4
      000032.jpg
      77 103 182 374 87 132 122 196 179 194 228 212 ...



In [ ]:

    
aug_tfms = [RandomRotate(3, p=0.5, tfm_y=TfmType.COORD),
            RandomLighting(0.05, 0.05, tfm_y=TfmType.COORD),
            RandomFlip(tfm_y=TfmType.COORD)]
tfms = tfms_from_model(f_model, sz, crop_type=CropType.NO, tfm_y=TfmType.COORD, aug_tfms=aug_tfms)
md = ImageClassifierData.from_csv(PATH, JPEGS, MBB_CSV, tfms=tfms, bs=bs, continuous=True, num_workers=4)



In [ ]:

    
import matplotlib.cm as cmx
import matplotlib.colors as mcolors
from cycler import cycler

def get_cmap(N):
    color_norm  = mcolors.Normalize(vmin=0, vmax=N-1)
    return cmx.ScalarMappable(norm=color_norm, cmap='Set3').to_rgba

num_colr = 12
cmap = get_cmap(num_colr)
colr_list = [cmap(float(x)) for x in range(num_colr)]



In [ ]:

    
def show_ground_truth(ax, im, bbox, clas=None, prs=None, thresh=0.3):
    bb = [bb_hw(o) for o in bbox.reshape(-1,4)]
    if prs is None:  prs  = [None]*len(bb)
    if clas is None: clas = [None]*len(bb)
    ax = show_img(im, ax=ax)
    for i,(b,c,pr) in enumerate(zip(bb, clas, prs)):
        if((b[2]>0) and (pr is None or pr > thresh)):
            draw_rect(ax, b, color=colr_list[i%num_colr])
            txt = f'{i}: '
            if c is not None: txt += ('bg' if c==len(id2cat) else id2cat[c])
            if pr is not None: txt += f' {pr:.2f}'
            draw_text(ax, b[:2], txt, color=colr_list[i%num_colr])



In [ ]:

    
class ConcatLblDataset(Dataset):
    def __init__(self, ds, y2):
        self.ds,self.y2 = ds,y2
        self.sz = ds.sz
    def __len__(self): return len(self.ds)
    
    def __getitem__(self, i):
        x,y = self.ds[i]
        return (x, (y,self.y2[i]))



In [ ]:

    
trn_ds2 = ConcatLblDataset(md.trn_ds, trn_mcs)
val_ds2 = ConcatLblDataset(md.val_ds, val_mcs)
md.trn_dl.dataset = trn_ds2
md.val_dl.dataset = val_ds2



In [ ]:

    
x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)



In [ ]:

    
x,y=to_np(next(iter(md.trn_dl)))
x=md.trn_ds.ds.denorm(x)



In [ ]:

    
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for i,ax in enumerate(axes.flat):
    show_ground_truth(ax, x[i], y[0][i], y[1][i])
plt.tight_layout()

Set up model

We're going to make a simple first model that simply predicts what object is located in each cell of a 4x4 grid. Later on we can try to improve this.



In [ ]:

    
anc_grid = 4
k = 1

anc_offset = 1/(anc_grid*2)
anc_x = np.repeat(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)
anc_y = np.tile(np.linspace(anc_offset, 1-anc_offset, anc_grid), anc_grid)

anc_ctrs = np.tile(np.stack([anc_x,anc_y], axis=1), (k,1))
anc_sizes = np.array([[1/anc_grid,1/anc_grid] for i in range(anc_grid*anc_grid)])
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()



In [ ]:

    
grid_sizes = V(np.array([1/anc_grid]), requires_grad=False).unsqueeze(1)



In [ ]:

    
plt.scatter(anc_x, anc_y)
plt.xlim(0, 1)
plt.ylim(0, 1);



In [ ]:

    
anchors









    Out[ ]:





Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.8750  0.2500  0.2500
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]



In [ ]:

    
def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)



In [ ]:

    
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])
anchor_cnr









    Out[ ]:





Variable containing:
 0.0000  0.0000  0.2500  0.2500
 0.0000  0.2500  0.2500  0.5000
 0.0000  0.5000  0.2500  0.7500
 0.0000  0.7500  0.2500  1.0000
 0.2500  0.0000  0.5000  0.2500
 0.2500  0.2500  0.5000  0.5000
 0.2500  0.5000  0.5000  0.7500
 0.2500  0.7500  0.5000  1.0000
 0.5000  0.0000  0.7500  0.2500
 0.5000  0.2500  0.7500  0.5000
 0.5000  0.5000  0.7500  0.7500
 0.5000  0.7500  0.7500  1.0000
 0.7500  0.0000  1.0000  0.2500
 0.7500  0.2500  1.0000  0.5000
 0.7500  0.5000  1.0000  0.7500
 0.7500  0.7500  1.0000  1.0000
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]



In [ ]:

    
n_clas = len(id2cat)+1
n_act = k*(4+n_clas)



In [ ]:

    
class StdConv(nn.Module):
    def __init__(self, nin, nout, stride=2, drop=0.1):
        super().__init__()
        self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
        self.bn = nn.BatchNorm2d(nout)
        self.drop = nn.Dropout(drop)
        
    def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))
        
def flatten_conv(x,k):
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)



In [ ]:

    
class OutConv(nn.Module):
    def __init__(self, k, nin, bias):
        super().__init__()
        self.k = k
        self.oconv1 = nn.Conv2d(nin, (len(id2cat)+1)*k, 3, padding=1)
        self.oconv2 = nn.Conv2d(nin, 4*k, 3, padding=1)
        self.oconv1.bias.data.zero_().add_(bias)
        
    def forward(self, x):
        return [flatten_conv(self.oconv1(x), self.k),
                flatten_conv(self.oconv2(x), self.k)]



In [ ]:

    
class SSD_Head(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(0.25)
        self.sconv0 = StdConv(512,256, stride=1)
#         self.sconv1 = StdConv(256,256)
        self.sconv2 = StdConv(256,256)
        self.out = OutConv(k, 256, bias)
        
    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
#         x = self.sconv1(x)
        x = self.sconv2(x)
        return self.out(x)

head_reg4 = SSD_Head(k, -3.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam
k









    Out[ ]:





1

Train



In [ ]:

    
def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]

class BCE_Loss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

    def forward(self, pred, targ):
        t = one_hot_embedding(targ, self.num_classes+1)
        t = V(t[:,:-1].contiguous())#.cpu()
        x = pred[:,:-1]
        w = self.get_weight(x,t)
        return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)/self.num_classes
    
    def get_weight(self,x,t): return None

loss_f = BCE_Loss(len(id2cat))



In [ ]:

    
def intersect(box_a, box_b):
    max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
    min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]

def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))

def jaccard(box_a, box_b):
    inter = intersect(box_a, box_b)
    union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
    return inter / union



In [ ]:

    
def get_y(bbox,clas):
    bbox = bbox.view(-1,4)/sz
    bb_keep = ((bbox[:,2]-bbox[:,0])>0).nonzero()[:,0]
    return bbox[bb_keep],clas[bb_keep]

def actn_to_bb(actn, anchors):
    actn_bbs = torch.tanh(actn)
    actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]
    actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
    return hw2corners(actn_centers, actn_hw)

def map_to_ground_truth(overlaps, print_it=False):
    prior_overlap, prior_idx = overlaps.max(1)
    if print_it: print(prior_overlap)
#     pdb.set_trace()
    gt_overlap, gt_idx = overlaps.max(0)
    gt_overlap[prior_idx] = 1.99
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    return gt_overlap,gt_idx

def ssd_1_loss(b_c,b_bb,bbox,clas,print_it=False):
    bbox,clas = get_y(bbox,clas)
    a_ic = actn_to_bb(b_bb, anchors)
    overlaps = jaccard(bbox.data, anchor_cnr.data)
    gt_overlap,gt_idx = map_to_ground_truth(overlaps,print_it)
    gt_clas = clas[gt_idx]
    pos = gt_overlap > 0.4
    pos_idx = torch.nonzero(pos)[:,0]
    gt_clas[1-pos] = len(id2cat)
    gt_bbox = bbox[gt_idx]
    loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
    clas_loss  = loss_f(b_c, gt_clas)
    return loc_loss, clas_loss

def ssd_loss(pred,targ,print_it=False):
    lcs,lls = 0.,0.
    for b_c,b_bb,bbox,clas in zip(*pred,*targ):
        loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas,print_it)
        lls += loc_loss
        lcs += clas_loss
    if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
    return lls+lcs



In [ ]:

    
x,y = next(iter(md.val_dl))
# x,y = V(x).cpu(),V(y)
x,y = V(x),V(y)



In [ ]:

    
for i,o in enumerate(y): y[i] = o.cuda()
learn.model.cuda()



In [ ]:

    
batch = learn.model(x)



In [ ]:

    
# uncomment to debug on cpu
#anchors = anchors.cpu(); grid_sizes = grid_sizes.cpu(); anchor_cnr = anchor_cnr.cpu()



In [ ]:

    
ssd_loss(batch, y, True)









    



 0.1947
 0.1168
 0.2652
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2885
 0.0888
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  9.9027
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1608
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3237
 0.2153
 0.2558
 0.2013
 0.2526
 0.0485
 0.0879
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3258
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2704
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.4538
 0.1897
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.1527
 0.1863
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3426
 0.3249
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.0642
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2027
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2418
 0.2337
 0.2590
[torch.cuda.FloatTensor of size 3 (GPU 0)]


1.00000e-02 *
  8.4642
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3652
 0.1377
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  9.5146
  5.7398
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.1148
 0.1341
 0.1650
 0.0384
 0.2213
 0.1477
 0.2520
 0.2531
 0.2129
 0.2144
 0.1795
 0.3002
 0.3057
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.2097
 0.2182
 0.2786
 0.2973
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.2568
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2184
 0.2459
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1166
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.0898
 0.0548
 0.4860
 0.0865
 0.1805
 0.2080
 0.2583
 0.0650
 0.0383
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2222
 0.1000
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  6.6300
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1940
 0.1498
 0.4352
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.1231
 0.2356
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2515
 0.2851
 0.2107
 0.2351
 0.2572
 0.1801
 0.2538
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.2544
 0.0842
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1890
 0.2767
 0.2161
 0.2104
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.1465
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3846
 0.4679
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1677
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3781
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1589
 0.1125
 0.1994
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2309
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2164
 0.4026
 0.3522
 0.2881
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.3824
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1823
 0.0647
 0.0404
 0.1737
 0.1553
 0.3090
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.2105
 0.2143
 0.1074
 0.1572
 0.1939
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.1817
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.0536
 0.2392
 0.4061
 0.0804
 0.3463
 0.3876
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.1975
 0.1799
 0.2146
 0.0935
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.2553
 0.1721
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2017
 0.0885
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4367
 0.2400
 0.1817
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2471
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3207
 0.2089
 0.6309
 0.1183
 0.2568
[torch.cuda.FloatTensor of size 5 (GPU 0)]


1.00000e-02 *
  8.3850
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1024
 0.2968
[torch.cuda.FloatTensor of size 2 (GPU 0)]


1.00000e-02 *
  8.3770
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2832
 0.1478
 0.0903
 0.3304
 0.1316
 0.1940
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.4223
 0.1600
 0.2250
 0.3211
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.3666
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1067
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1610
 0.1593
 0.3415
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.3255
 0.3394
 0.3390
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2139
 0.3500
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.1369
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1455
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2794
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2309
[torch.cuda.FloatTensor of size 1 (GPU 0)]


1.00000e-02 *
  6.3919
  9.1493
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4062
 0.2180
 0.1307
 0.5762
 0.1524
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.1128
[torch.cuda.FloatTensor of size 1 (GPU 0)]

loc: 10.175130844116211, clas: 72.91587829589844






    Out[ ]:





Variable containing:
 83.0910
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [ ]:

    
learn.crit = ssd_loss
lr = 3e-3
lrs = np.array([lr/100,lr/10,lr])



In [ ]:

    
learn.lr_find(lrs/1000,1.)
learn.sched.plot(1)









    





 
 










    



epoch      trn_loss   val_loss                            
    0      86.852668  32587.789062



In [ ]:

    
learn.fit(lr, 1, cycle_len=5, use_clr=(20,10))









    





 
 










    



epoch      trn_loss   val_loss                            
    0      43.166077  32.56049  
    1      33.731625  28.329123                           
    2      29.498006  27.387726                           
    3      26.590789  26.043869                           
    4      24.470896  25.746592                           







    Out[ ]:





[25.746592]



In [ ]:

    
learn.save('0')



In [ ]:

    
learn.load('0')

Testing



In [ ]:

    
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
learn.model.eval()
batch = learn.model(x)
b_clas,b_bb = batch



In [ ]:

    
b_clas.size(),b_bb.size()









    Out[ ]:





(torch.Size([64, 16, 21]), torch.Size([64, 16, 4]))



In [ ]:

    
idx=7
b_clasi = b_clas[idx]
b_bboxi = b_bb[idx]
ima=md.val_ds.ds.denorm(to_np(x))[idx]
bbox,clas = get_y(y[0][idx], y[1][idx])
bbox,clas









    Out[ ]:





(Variable containing:
  0.6786  0.4866  0.9911  0.6250
  0.7098  0.0848  0.9911  0.5491
  0.5134  0.8304  0.6696  0.9063
 [torch.cuda.FloatTensor of size 3x4 (GPU 0)], Variable containing:
   8
  10
  17
 [torch.cuda.LongTensor of size 3 (GPU 0)])



In [ ]:

    
def torch_gt(ax, ima, bbox, clas, prs=None, thresh=0.4):
    return show_ground_truth(ax, ima, to_np((bbox*224).long()),
         to_np(clas), to_np(prs) if prs is not None else None, thresh)



In [ ]:

    
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, bbox, clas)



In [ ]:

    
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, anchor_cnr, b_clasi.max(1)[1])



In [ ]:

    
grid_sizes









    Out[ ]:





Variable containing:
 0.2500
[torch.cuda.FloatTensor of size 1x1 (GPU 0)]



In [ ]:

    
anchors









    Out[ ]:





Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.8750  0.2500  0.2500
[torch.cuda.FloatTensor of size 16x4 (GPU 0)]



In [ ]:

    
a_ic = actn_to_bb(b_bboxi, anchors)



In [ ]:

    
fig, ax = plt.subplots(figsize=(7,7))
torch_gt(ax, ima, a_ic, b_clasi.max(1)[1], b_clasi.max(1)[0].sigmoid(), thresh=0.0)



In [ ]:

    
overlaps = jaccard(bbox.data, anchor_cnr.data)
overlaps









    Out[ ]:






Columns 0 to 9 
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0091
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0356  0.0549
 0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000  0.0000

Columns 10 to 15 
 0.0922  0.0000  0.0000  0.0315  0.3985  0.0000
 0.0103  0.0000  0.2598  0.4538  0.0653  0.0000
 0.0000  0.1897  0.0000  0.0000  0.0000  0.0000
[torch.cuda.FloatTensor of size 3x16 (GPU 0)]



In [ ]:

    
overlaps.max(1)









    Out[ ]:





(
  0.3985
  0.4538
  0.1897
 [torch.cuda.FloatTensor of size 3 (GPU 0)], 
  14
  13
  11
 [torch.cuda.LongTensor of size 3 (GPU 0)])



In [ ]:

    
overlaps.max(0)









    Out[ ]:





(
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0356
  0.0549
  0.0922
  0.1897
  0.2598
  0.4538
  0.3985
  0.0000
 [torch.cuda.FloatTensor of size 16 (GPU 0)], 
  0
  0
  0
  0
  0
  0
  0
  0
  1
  1
  0
  2
  1
  1
  0
  0
 [torch.cuda.LongTensor of size 16 (GPU 0)])



In [ ]:

    
gt_overlap,gt_idx = map_to_ground_truth(overlaps)
gt_overlap,gt_idx









    Out[ ]:





(
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0000
  0.0356
  0.0549
  0.0922
  1.9900
  0.2598
  1.9900
  1.9900
  0.0000
 [torch.cuda.FloatTensor of size 16 (GPU 0)], 
  0
  0
  0
  0
  0
  0
  0
  0
  1
  1
  0
  2
  1
  1
  0
  0
 [torch.cuda.LongTensor of size 16 (GPU 0)])



In [ ]:

    
gt_clas = clas[gt_idx]; gt_clas









    Out[ ]:





Variable containing:
  8
  8
  8
  8
  8
  8
  8
  8
 10
 10
  8
 17
 10
 10
  8
  8
[torch.cuda.LongTensor of size 16 (GPU 0)]



In [ ]:

    
thresh = 0.5
pos = gt_overlap > thresh
pos_idx = torch.nonzero(pos)[:,0]
neg_idx = torch.nonzero(1-pos)[:,0]
pos_idx









    Out[ ]:





 11
 13
 14
[torch.cuda.LongTensor of size 3 (GPU 0)]



In [ ]:

    
gt_clas[1-pos] = len(id2cat)
[id2cat[o] if o<len(id2cat) else 'bg' for o in gt_clas.data]









    Out[ ]:





['bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'bg',
 'sofa',
 'bg',
 'diningtable',
 'chair',
 'bg']



In [ ]:

    
gt_bbox = bbox[gt_idx]
loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
clas_loss  = F.cross_entropy(b_clasi, gt_clas)
loc_loss,clas_loss









    Out[ ]:





(Variable containing:
 1.00000e-02 *
   6.5691
 [torch.cuda.FloatTensor of size 1 (GPU 0)], Variable containing:
  1.1215
 [torch.cuda.FloatTensor of size 1 (GPU 0)])



In [ ]:

    
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
    ima=md.val_ds.ds.denorm(to_np(x))[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    ima=md.val_ds.ds.denorm(to_np(x))[idx]
    bbox,clas = get_y(bbox,clas); bbox,clas
    a_ic = actn_to_bb(b_bb[idx], anchors)
    torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.01)
plt.tight_layout()









    



Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

More anchors!

Create anchors



In [ ]:

    
anc_grids = [4,2,1]
# anc_grids = [2]
anc_zooms = [0.7, 1., 1.3]
# anc_zooms = [1.]
anc_ratios = [(1.,1.), (1.,0.5), (0.5,1.)]
# anc_ratios = [(1.,1.)]
anchor_scales = [(anz*i,anz*j) for anz in anc_zooms for (i,j) in anc_ratios]
k = len(anchor_scales)
anc_offsets = [1/(o*2) for o in anc_grids]
k









    Out[ ]:





9



In [ ]:

    
anc_x = np.concatenate([np.repeat(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_y = np.concatenate([np.tile(np.linspace(ao, 1-ao, ag), ag)
                        for ao,ag in zip(anc_offsets,anc_grids)])
anc_ctrs = np.repeat(np.stack([anc_x,anc_y], axis=1), k, axis=0)



In [ ]:

    
anc_sizes  =   np.concatenate([np.array([[o/ag,p/ag] for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids])
grid_sizes = V(np.concatenate([np.array([ 1/ag       for i in range(ag*ag) for o,p in anchor_scales])
               for ag in anc_grids]), requires_grad=False).unsqueeze(1)
anchors = V(np.concatenate([anc_ctrs, anc_sizes], axis=1), requires_grad=False).float()
anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:])



In [ ]:

    
anchors









    Out[ ]:





Variable containing:
 0.1250  0.1250  0.2500  0.2500
 0.1250  0.1250  0.2500  0.1250
 0.1250  0.1250  0.1250  0.2500
 0.1250  0.3750  0.2500  0.2500
 0.1250  0.3750  0.2500  0.1250
 0.1250  0.3750  0.1250  0.2500
 0.1250  0.6250  0.2500  0.2500
 0.1250  0.6250  0.2500  0.1250
 0.1250  0.6250  0.1250  0.2500
 0.1250  0.8750  0.2500  0.2500
 0.1250  0.8750  0.2500  0.1250
 0.1250  0.8750  0.1250  0.2500
 0.3750  0.1250  0.2500  0.2500
 0.3750  0.1250  0.2500  0.1250
 0.3750  0.1250  0.1250  0.2500
 0.3750  0.3750  0.2500  0.2500
 0.3750  0.3750  0.2500  0.1250
 0.3750  0.3750  0.1250  0.2500
 0.3750  0.6250  0.2500  0.2500
 0.3750  0.6250  0.2500  0.1250
 0.3750  0.6250  0.1250  0.2500
 0.3750  0.8750  0.2500  0.2500
 0.3750  0.8750  0.2500  0.1250
 0.3750  0.8750  0.1250  0.2500
 0.6250  0.1250  0.2500  0.2500
 0.6250  0.1250  0.2500  0.1250
 0.6250  0.1250  0.1250  0.2500
 0.6250  0.3750  0.2500  0.2500
 0.6250  0.3750  0.2500  0.1250
 0.6250  0.3750  0.1250  0.2500
 0.6250  0.6250  0.2500  0.2500
 0.6250  0.6250  0.2500  0.1250
 0.6250  0.6250  0.1250  0.2500
 0.6250  0.8750  0.2500  0.2500
 0.6250  0.8750  0.2500  0.1250
 0.6250  0.8750  0.1250  0.2500
 0.8750  0.1250  0.2500  0.2500
 0.8750  0.1250  0.2500  0.1250
 0.8750  0.1250  0.1250  0.2500
 0.8750  0.3750  0.2500  0.2500
 0.8750  0.3750  0.2500  0.1250
 0.8750  0.3750  0.1250  0.2500
 0.8750  0.6250  0.2500  0.2500
 0.8750  0.6250  0.2500  0.1250
 0.8750  0.6250  0.1250  0.2500
 0.8750  0.8750  0.2500  0.2500
 0.8750  0.8750  0.2500  0.1250
 0.8750  0.8750  0.1250  0.2500
 0.2500  0.2500  0.5000  0.5000
 0.2500  0.2500  0.5000  0.2500
 0.2500  0.2500  0.2500  0.5000
 0.2500  0.7500  0.5000  0.5000
 0.2500  0.7500  0.5000  0.2500
 0.2500  0.7500  0.2500  0.5000
 0.7500  0.2500  0.5000  0.5000
 0.7500  0.2500  0.5000  0.2500
 0.7500  0.2500  0.2500  0.5000
 0.7500  0.7500  0.5000  0.5000
 0.7500  0.7500  0.5000  0.2500
 0.7500  0.7500  0.2500  0.5000
 0.5000  0.5000  1.0000  1.0000
 0.5000  0.5000  1.0000  0.5000
 0.5000  0.5000  0.5000  1.0000
[torch.cuda.FloatTensor of size 63x4 (GPU 0)]



In [ ]:

    
x,y=to_np(next(iter(md.val_dl)))
x=md.val_ds.ds.denorm(x)



In [ ]:

    
a=np.reshape((to_np(anchor_cnr) + to_np(torch.randn(*anchor_cnr.size()))*0.01)*224, -1)



In [ ]:

    
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)









    



Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).



In [ ]:

    
fig, ax = plt.subplots(figsize=(7,7))
show_ground_truth(ax, x[0], a)









    



Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Model



In [ ]:

    
drop=0.4

class SSD_MultiHead(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(drop)
        self.sconv0 = StdConv(512,256, stride=1, drop=drop)
        self.sconv1 = StdConv(256,256, drop=drop)
        self.sconv2 = StdConv(256,256, drop=drop)
        self.sconv3 = StdConv(256,256, drop=drop)
        self.out0 = OutConv(k, 256, bias)
        self.out1 = OutConv(k, 256, bias)
        self.out2 = OutConv(k, 256, bias)
        self.out3 = OutConv(k, 256, bias)

    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
        x = self.sconv1(x)
        o1c,o1l = self.out1(x)
        x = self.sconv2(x)
        o2c,o2l = self.out2(x)
        x = self.sconv3(x)
        o3c,o3l = self.out3(x)
        return [torch.cat([o1c,o2c,o3c], dim=1),
                torch.cat([o1l,o2l,o3l], dim=1)]

head_reg4 = SSD_MultiHead(k, -4.)
models = ConvnetBuilder(f_model, 0, 0, 0, custom_head=head_reg4)
learn = ConvLearner(md, models)
learn.opt_fn = optim.Adam



In [ ]:

    
learn.crit = ssd_loss
lr = 1e-2
lrs = np.array([lr/100,lr/10,lr])



In [ ]:

    
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(V(x))



In [ ]:

    
batch[0].size(),batch[1].size()









    Out[ ]:





(torch.Size([64, 189, 21]), torch.Size([64, 189, 4]))



In [ ]:

    
ssd_loss(batch, y, True)









    



 0.5598
 0.7922
 0.3095
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5718
 0.7035
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7268
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5163
 0.4763
 0.4033
 0.4986
 0.2990
 0.0887
 0.1046
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3789
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5153
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.6215
 0.5547
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.4246
 0.5208
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3436
 0.3257
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.9734
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3900
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7722
 0.5395
 0.6392
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.7618
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4679
 0.8070
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6597
 0.1274
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.2584
 0.1665
 0.2373
 0.0872
 0.3571
 0.1477
 0.2520
 0.4103
 0.3394
 0.2884
 0.4922
 0.3787
 0.3083
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.3465
 0.4702
 0.3075
 0.3699
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.6350
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4666
 0.5763
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6857
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1651
 0.0595
 0.6267
 0.2088
 0.3256
 0.3768
 0.4768
 0.0901
 0.0670
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2862
 0.6248
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.9427
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6937
 0.7272
 0.4980
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.7237
 0.9103
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2828
 0.3771
 0.4462
 0.4403
 0.4001
 0.4693
 0.2927
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3013
 0.1132
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4877
 0.3506
 0.2161
 0.5820
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.7152
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5290
 0.6212
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6086
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5147
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6993
 0.1816
 0.3097
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5795
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5134
 0.5408
 0.3522
 0.4801
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.4327
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4907
 0.1219
 0.0792
 0.5814
 0.5660
 0.6971
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.5459
 0.3704
 0.1074
 0.1848
 0.4760
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.4316
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1364
 0.6287
 0.4061
 0.1304
 0.3701
 0.4181
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.5735
 0.2463
 0.2852
 0.6738
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5688
 0.6468
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2888
 0.7060
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4828
 0.2400
 0.2915
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.3020
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4500
 0.2089
 0.7658
 0.5281
 0.4424
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.7547
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7615
 0.4178
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7539
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2938
 0.1583
 0.1342
 0.5076
 0.1785
 0.2339
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.6536
 0.4844
 0.4022
 0.3861
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5740
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5858
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7901
 0.5316
 0.5802
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4405
 0.4869
 0.5088
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5414
 0.5224
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7278
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7401
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4864
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4314
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.9778
 0.7884
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4372
 0.5654
 0.2238
 0.5762
 0.6364
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.7330
[torch.cuda.FloatTensor of size 1 (GPU 0)]

Variable containing:
 4.5301
[torch.cuda.FloatTensor of size 1 (GPU 0)]
 Variable containing:
 61.3364
[torch.cuda.FloatTensor of size 1 (GPU 0)]







    Out[ ]:





Variable containing:
 65.8664
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [ ]:

    
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=2)









    





 
 










    



 94%|█████████▍| 30/32 [00:18<00:01,  1.65it/s, loss=89.7]



In [ ]:

    
learn.fit(lrs, 1, cycle_len=4, use_clr=(20,8))









    





 
 










    



epoch      trn_loss   val_loss                            
    0      23.020269  22.007149 
    1      19.23732   15.323267                           
    2      16.612079  13.967303                           
    3      14.706582  12.920008                           







    Out[ ]:





[12.920008]



In [ ]:

    
learn.save('tmp')



In [ ]:

    
learn.freeze_to(-2)
learn.fit(lrs/2, 1, cycle_len=4, use_clr=(20,8))









    





 
 










    



epoch      trn_loss   val_loss                            
    0      14.021227  17.886932 
    1      13.386686  12.754044                           
    2      12.297876  11.913645                           
    3      11.29237   11.368293                           







    Out[ ]:





[11.368293]



In [ ]:

    
learn.save('prefocal')



In [ ]:

    
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)

fig, axes = plt.subplots(3, 4, figsize=(16, 12))
for idx,ax in enumerate(axes.flat):
    ima=md.val_ds.ds.denorm(x)[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    torch_gt(ax, ima, a_ic, b_clas[idx].max(1)[1], b_clas[idx].max(1)[0].sigmoid(), 0.21)
plt.tight_layout()









    



Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

Focal loss



In [ ]:

    
def plot_results(thresh):
    x,y = next(iter(md.val_dl))
    y = V(y)
    batch = learn.model(V(x))
    b_clas,b_bb = batch

    x = to_np(x)
    fig, axes = plt.subplots(3, 4, figsize=(16, 12))
    for idx,ax in enumerate(axes.flat):
        ima=md.val_ds.ds.denorm(x)[idx]
        bbox,clas = get_y(y[0][idx], y[1][idx])
        a_ic = actn_to_bb(b_bb[idx], anchors)
        clas_pr, clas_ids = b_clas[idx].max(1)
        clas_pr = clas_pr.sigmoid()
        torch_gt(ax, ima, a_ic, clas_ids, clas_pr, clas_pr.max().data[0]*thresh)
    plt.tight_layout()



In [ ]:

    
class FocalLoss(BCE_Loss):
    def get_weight(self,x,t):
        alpha,gamma = 0.25,1
        p = x.sigmoid()
        pt = p*t + (1-p)*(1-t)
        w = alpha*t + (1-alpha)*(1-t)
        return w * (1-pt).pow(gamma)

loss_f = FocalLoss(len(id2cat))



In [ ]:

    
x,y = next(iter(md.val_dl))
x,y = V(x),V(y)
batch = learn.model(x)
ssd_loss(batch, y, True)









    



 0.5598
 0.7922
 0.3095
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5718
 0.7035
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.8714
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7268
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5163
 0.4763
 0.4033
 0.4986
 0.2990
 0.0887
 0.1046
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3789
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5153
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.3985
 0.6215
 0.5547
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.4246
 0.5208
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3436
 0.3257
 0.5062
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.9734
 0.2506
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.3900
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7722
 0.5395
 0.6392
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.7618
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4679
 0.8070
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6597
 0.1274
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2041
 0.2584
 0.1665
 0.2373
 0.0872
 0.3571
 0.1477
 0.2520
 0.4103
 0.3394
 0.2884
 0.4922
 0.3787
 0.3083
[torch.cuda.FloatTensor of size 14 (GPU 0)]


 0.3465
 0.4702
 0.3075
 0.3699
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.6350
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4666
 0.5763
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6857
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1651
 0.0595
 0.6267
 0.2088
 0.3256
 0.3768
 0.4768
 0.0901
 0.0670
[torch.cuda.FloatTensor of size 9 (GPU 0)]


 0.2862
 0.6248
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.9427
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6937
 0.7272
 0.4980
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5732
 0.7237
 0.9103
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.2828
 0.3771
 0.4462
 0.4403
 0.4001
 0.4693
 0.2927
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.3013
 0.1132
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4877
 0.3506
 0.2161
 0.5820
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.7152
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5290
 0.6212
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.6086
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5147
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.6993
 0.1816
 0.3097
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5795
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5134
 0.5408
 0.3522
 0.4801
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4166
 0.4327
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4907
 0.1219
 0.0792
 0.5814
 0.5660
 0.6971
 0.3726
[torch.cuda.FloatTensor of size 7 (GPU 0)]


 0.5459
 0.3704
 0.1074
 0.1848
 0.4760
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.4316
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.1364
 0.6287
 0.4061
 0.1304
 0.3701
 0.4181
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.5735
 0.2463
 0.2852
 0.6738
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5688
 0.6468
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.2888
 0.7060
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4828
 0.2400
 0.2915
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.3020
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4500
 0.2089
 0.7658
 0.5281
 0.4424
[torch.cuda.FloatTensor of size 5 (GPU 0)]


 0.7547
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7615
 0.4178
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7539
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.2938
 0.1583
 0.1342
 0.5076
 0.1785
 0.2339
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.6536
 0.4844
 0.4022
 0.3861
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.5740
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.5858
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7901
 0.5316
 0.5802
 0.6606
[torch.cuda.FloatTensor of size 4 (GPU 0)]


 0.4405
 0.4869
 0.5088
[torch.cuda.FloatTensor of size 3 (GPU 0)]


 0.5414
 0.5224
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.7278
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.7401
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4864
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.4314
[torch.cuda.FloatTensor of size 1 (GPU 0)]


 0.9778
 0.7884
[torch.cuda.FloatTensor of size 2 (GPU 0)]


 0.4372
 0.5654
 0.2238
 0.5762
 0.6364
 0.4794
[torch.cuda.FloatTensor of size 6 (GPU 0)]


 0.7330
[torch.cuda.FloatTensor of size 1 (GPU 0)]

loc: 3.6088805198669434, clas: 7.331346035003662






    Out[ ]:





Variable containing:
 10.9402
[torch.cuda.FloatTensor of size 1 (GPU 0)]



In [ ]:

    
learn.lr_find(lrs/1000,1.)
learn.sched.plot(n_skip_end=1)









    





 
 










    



 19%|█▉        | 6/32 [00:05<00:24,  1.07it/s, loss=67.7]






    



Exception in thread Thread-228:
Traceback (most recent call last):
  File "/home/jhoward/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/jhoward/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/jhoward/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration







    



 72%|███████▏  | 23/32 [00:15<00:06,  1.44it/s, loss=nan]



In [ ]:

    
learn.fit(lrs, 1, cycle_len=10, use_clr=(20,10))









    





 
 










    



epoch      trn_loss   val_loss                            
    0      27.244542  32.936592 
    1      24.687115  22.024887                           
    2      22.251388  19.824711                           
    3      20.238359  18.530561                           
    4      18.613792  17.730865                           
    5      17.376519  17.323956                           
    6      16.33999   16.968851                           
    7      15.425277  16.894522                           
    8      14.683091  16.533207                           
    9      14.044275  16.332354                           







    Out[ ]:





[16.332354]



In [ ]:

    
learn.save('fl0')



In [ ]:

    
learn.load('fl0')



In [ ]:

    
learn.freeze_to(-2)
learn.fit(lrs/4, 1, cycle_len=10, use_clr=(20,10))









    





 
 










    



epoch      trn_loss   val_loss                            
    0      13.991107  17.163681 
    1      14.275143  16.685173                           
    2      13.81701   16.067303                           
    3      13.172081  15.567028                           
    4      12.474847  15.480181                           
    5      11.774984  15.262911                           
    6      11.21864   15.10038                            
    7      10.711037  15.184849                           
    8      10.215424  14.942656                           
    9      9.871555   14.910997                           







    Out[ ]:





[14.910997]



In [ ]:

    
learn.save('drop4')



In [ ]:

    
learn.load('drop4')



In [ ]:

    
plot_results(0.75)









    



Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

NMS



In [ ]:

    
def nms(boxes, scores, overlap=0.5, top_k=100):
    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0: return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        keep[count] = i
        count += 1
        if idx.size(0) == 1: break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count



In [ ]:

    
x,y = next(iter(md.val_dl))
y = V(y)
batch = learn.model(V(x))
b_clas,b_bb = batch
x = to_np(x)



In [ ]:

    
def show_nmf(idx):
    ima=md.val_ds.ds.denorm(x)[idx]
    bbox,clas = get_y(y[0][idx], y[1][idx])
    a_ic = actn_to_bb(b_bb[idx], anchors)
    clas_pr, clas_ids = b_clas[idx].max(1)
    clas_pr = clas_pr.sigmoid()

    conf_scores = b_clas[idx].sigmoid().t().data

    out1,out2,cc = [],[],[]
    for cl in range(0, len(conf_scores)-1):
        c_mask = conf_scores[cl] > 0.25
        if c_mask.sum() == 0: continue
        scores = conf_scores[cl][c_mask]
        l_mask = c_mask.unsqueeze(1).expand_as(a_ic)
        boxes = a_ic[l_mask].view(-1, 4)
        ids, count = nms(boxes.data, scores, 0.4, 50)
        ids = ids[:count]
        out1.append(scores[ids])
        out2.append(boxes.data[ids])
        cc.append([cl]*count)
    if not cc:
        print(f"{i}: empty array")
        return
    cc = T(np.concatenate(cc))
    out1 = torch.cat(out1)
    out2 = torch.cat(out2)

    fig, ax = plt.subplots(figsize=(8,8))
    torch_gt(ax, ima, out2, cc, out1, 0.1)



In [ ]:

    
for i in range(12): show_nmf(i)









    



Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).

End



In [ ]:

	fn	bbox
0	000012.jpg	96 155 269 350
1	000017.jpg	61 184 198 278 77 89 335 402
2	000023.jpg	229 8 499 244 219 229 499 333 0 1 368 116 1 2 ...
3	000026.jpg	124 89 211 336
4	000032.jpg	77 103 182 374 87 132 122 196 179 194 228 212 ...