This is a follow-along with How to write custom omdels with fast.ai

How to Write Custom Models with Fast.AI

Example on how to modify fastai to use a custom pretrained network.


In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

In [3]:
import pandas as pd
import numpy as np
path = 'data/gloc/'
model_path = path + 'results/'

Custom ConvnetBuilder

This ConvnetBuilderVGG is similar to ConvnetBuilder. It's modifying VGG16. The default vgg16 in FastAI 'cuts' all the fully connected layers. In this example we keep all layers but the last one. There's a very small dataset so the intention is to train the last 2 fully connected layers.


In [4]:
# to override the fastai vgg16 function
from torchvision.models import vgg16

# Creates a ConvnetBuilder with all pretrained layers from vgg16 but the last fully connected layer
class ConvnetBuilderVGG():
    """
    Class representing a convolutional network.
    Arguments:
        c (int): size of the last layer
        is_multi (bool): is multilabel classification
        is_reg (bool): is a regression
        ps (float): dropout parameter for last layer
    """
    
    def __init__(self, c, is_multi, is_reg, ps=None):
        self.c, self.is_multi, self.is_reg = c, is_multi, is_reg
        self.ps = ps or 0.5
        vgg = vgg16(True) # NOTE: okay so I need to study how PyTorch does this
        self.lr_cut = 30
        layers = children(vgg.features)
        layers += [Flatten()] + children(vgg.classifier)[:5]
        #self.nf = 4096
        # here top model is everything but the last layer
        self.top_model = nn.Sequential(*layers) # NOTE: I need to find out what the fn(*arg) syntax is
        
        fc_layers = self.create_fc_layer(4096, c, p=None)
        self.n_fc = len(fc_layers)
        self.fc_model = to_gpu(nn.Sequential(*fc_layers))
        apply_init(self.fc_model, kaiming_normal)
        self.model = to_gpu(nn.Sequential(*(layers+fc_layers)))
        
    def create_fc_layer(self, ni, nf, p, actn=None):
        res=[]
        if p: res.append(nn.Dropout(p=p))
        res.append(nn.Linear(in_features=ni, out_features=nf))
        if actn: res.append(actn())
        return res

    @property # NOTE: I also need to learn Python Static Method syntax --> https://stackoverflow.com/questions/400739/what-does-asterisk-mean-in-python
    def name(self): return "vgg16"
    
    def get_layer_groups(self, do_fc=False):
        if do_fc:
            m, idxs = self.fc_model, []
        else:
            m, idxs = self.model, [self.lr_cut, -self.n_fc]
        lgs = list(split_by_idxs(children(m), idxs))
        return lgs

In [7]:
bs=32; sz=224
f_model = vgg16
n = 7637
val_idxs = get_cv_idxs(n, 0, val_pct=0.2)
tfms = tfms_from_model(f_model, sz) # NOTE: how would it know, if this is a custom/PyTorch model?
data = ImageClassifierData.from_csv(path, 'train', f'{path}train.csv', bs, tfms, 
                                    val_idxs=val_idxs, continuous=True)
# note precompute=False
models = ConvnetBuilderVGG(data.c, data.is_multi, data.is_reg)
models.model


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/WayNoxchi/.torch/models/vgg16-397923af.pth
100%|██████████| 553433881/553433881 [02:38<00:00, 3491766.09it/s]
Out[7]:
Sequential(
  (0): Conv2d (3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace)
  (2): Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace)
  (4): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (5): Conv2d (64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace)
  (7): Conv2d (128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace)
  (9): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (10): Conv2d (128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace)
  (12): Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace)
  (14): Conv2d (256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace)
  (16): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (17): Conv2d (256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (18): ReLU(inplace)
  (19): Conv2d (512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (20): ReLU(inplace)
  (21): Conv2d (512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (22): ReLU(inplace)
  (23): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (24): Conv2d (512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (25): ReLU(inplace)
  (26): Conv2d (512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (27): ReLU(inplace)
  (28): Conv2d (512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (29): ReLU(inplace)
  (30): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (31): Flatten(
  )
  (32): Linear(in_features=25088, out_features=4096)
  (33): ReLU(inplace)
  (34): Dropout(p=0.5)
  (35): Linear(in_features=4096, out_features=4096)
  (36): ReLU(inplace)
  (37): Linear(in_features=4096, out_features=1)
)

In [13]:
class ConvLearnerVGG(ConvLearner):
    
    # rewriting pretrained
    @classmethod
    def pretrained(cls, data, ps=None, **kwargs):
        models = ConvnetBuilderVGG(data.c, data.is_multi, data.is_reg, ps=ps)
        return cls(data, models, **kwargs)
    
    # redefining freeze to freeze everything but last layer
    def freeze(self):
        layers = children(self.model)
        n = len(layers)
        for λ in layers:
            λ.trainable=False
            for p in λ.parameters(): p.requires_grad=False
        λ = layers[n-1]
        λ.trainable=True
        for p in λ.parameters(): p.requires_grad=True
    
    def unfreeze_prev_layer(self):
        layers = children(self.model)
        λ = layers[35]
        λ.trainable=True
        for p in λ.parameters(): p.requires_grad=True

In [14]:
bs = 32; sz = 224
f_model = vgg16
n = 7637
val_idxs = get_cv_idxs(n, 0, val_pct=0.2)
tfms = tfms_from_model(f_model, sz)

In [15]:
data = ImageClassifierData.from_csv(path, 'train', f'{path}train.csv', bs, tfms, 
                                    val_idxs=val_idxs, continuous=True)

In [16]:
learn = ConvLearnerVGG.pretrained(data, ps=0.0, precompute=False)

NOTE: this is on my MacBook w/o a GPU.


In [17]:
m = learn.models.model
trainable_params_(m)


Out[17]:
[Parameter containing:
 1.00000e-02 *
  1.8108  2.4513 -1.1013  ...  -3.0995 -1.4156 -0.8180
 [torch.FloatTensor of size 1x4096], Parameter containing:
  0
 [torch.FloatTensor of size 1]]

In [18]:
learn.unfreeze_prev_layer()
trainable_params_(m)


Out[18]:
[Parameter containing:
 -1.1262e-02  1.0421e-02 -1.6899e-03  ...  -1.6088e-02  1.2137e-02  6.5078e-03
 -5.4509e-04 -7.8270e-03  7.1184e-03  ...  -4.0817e-03  9.8776e-03 -1.1085e-02
 -1.0933e-02 -5.1533e-03  1.6766e-02  ...  -3.6180e-03  3.5386e-03 -2.2417e-02
                 ...                   ⋱                   ...                
 -1.0725e-02 -7.2678e-03 -3.8252e-03  ...  -2.4693e-03  8.3481e-03 -5.4105e-03
  5.4018e-03  8.1430e-03 -1.3569e-02  ...   4.0841e-03 -4.1793e-04 -2.2802e-03
  2.8788e-02  6.5824e-03  4.8993e-03  ...   2.4367e-02  6.5563e-03 -7.2610e-03
 [torch.FloatTensor of size 4096x4096], Parameter containing:
  0.0332
  0.0616
  0.0307
    ⋮   
  0.0456
  0.0442
  0.0588
 [torch.FloatTensor of size 4096], Parameter containing:
 1.00000e-02 *
  1.8108  2.4513 -1.1013  ...  -3.0995 -1.4156 -0.8180
 [torch.FloatTensor of size 1x4096], Parameter containing:
  0
 [torch.FloatTensor of size 1]]

So, the numbers in the top layers are different from Yannet's, but the Conv layer pars are exactly the same. Pretty sure this is because when you finetune a model, the FC layer/s you stack ontop are randomly initialized, but the Conv layers coming from a pretrained network (PyTorch VGG 16), are necessarily exactly the same, given you got the same pretrained weights.

Cross Validation


In [20]:
bs=32; sz=224
n = 7637
transforms_basic = [RandomRotateXY(10), RandomDihedralXY()]
transforms_basic = [RandomRotateXY(10)]

Here's code to do cross-validation:


In [24]:
def get_model_i(i=0):
    val_idxs = get_cv_idxs(n, i, val_pct=0.1)
    tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_basic, max_zoom=1.05)
    data = ImageClassifierData.from_csv(path, 'train', f'{path}train.csv', bs, tfms, 
                                        val_idxs=val_idxs, suffix='.jpg', continuous=True)
    learn = ConvLearnerVGG.pretrained(data, ps=0.0, precompute=False)
    return learn

How to train is a W.I.P. --- great..


In [22]:
def fit_and_predict(learn):
    learn.fit(1e-3, 3)
    learn.fit(1e-4, 4)
    print("unfreezing")
    learn.unfreeze_prev_layer()
    #learn.fit(1e-5, 3, cycle_len=1, cycle_mult=2)
    learn.fit(1e-5, 3)
    return learn.TTA()

Predictions


In [26]:
preds = []
for i in range(11):
    print("iteration ", i)
    learn = get_model_i(i)
    preds.append(fit_and_predict(learn))

This'll actually take forever on an i5 CPU, but it does start training so, that's good.


In [27]:
def reshape_preds(preds):
    predictions = [preds[i][0] for i in range(11)]
    y = [preds[i][1] for i in range(11)]
    pp = np.vstack(predictions)
    yy = np.vstack(y)
    print(yy.shape)
    pp = np.maximum(pp, 0.0)
    err = np.abs(pp - yy).mean()
    print("err", err)

In [ ]:
reshape_preds(preds)

Alrighty, that gave me some idea of how a custom model is added to FastAI.


In [ ]:

Getting length of dataset. Dataset is my custom-built set for my G-LOC-Detector.


In [6]:
df = pd.read_csv(path + 'train.csv')
len(df['id'])


Out[6]:
7637