Dataset is the fast.ai ImageNet sampleset. Jupyter kernel restarted between ImageNet learner runs due to model size.
In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
In [2]:
from pathlib import Path
from fastai.conv_learner import *
# from fastai.models import darknet
Current Darknet, and Proposed Changes:
NOTE: from .layers import *
changed to from fastai.layers import *
, preventing ModuleNotFoundError.
In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from fastai.layers import * ### <<<------
class ConvBN(nn.Module):
"convolutional layer then batchnorm"
def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0):
super().__init__()
self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
self.relu = nn.LeakyReLU(0.1, inplace=True)
def forward(self, x): return self.relu(self.bn(self.conv(x)))
class DarknetBlock(nn.Module):
def __init__(self, ch_in):
super().__init__()
ch_hid = ch_in//2
self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
def forward(self, x): return self.conv2(self.conv1(x)) + x
class Darknet(nn.Module):
"Replicates the darknet classifier from the YOLOv3 paper (table 1)"
def make_group_layer(self, ch_in, num_blocks, stride=1):
layers = [ConvBN(ch_in,ch_in*2,stride=stride)]
for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2))
return layers
def __init__(self, num_blocks, num_classes=1000, start_nf=32):
super().__init__()
nf = start_nf
layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
for i,nb in enumerate(num_blocks):
layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2))
nf *= 2
layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
self.layers = nn.Sequential(*layers)
def forward(self, x): return self.layers(x)
########################
### Proposed Version ###
class PR_Darknet(nn.Module):
"Replicates the darknet classifier from the YOLOv3 paper (table 1)"
def make_group_layer(self, ch_in, num_blocks, stride=1):
layers = [ConvBN(ch_in,ch_in*2,stride=stride)]
for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2))
return layers
def __init__(self, num_blocks, num_classes=1000, start_nf=32):
super().__init__()
nf = start_nf
layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
for i,nb in enumerate(num_blocks):
layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2))
nf *= 2
layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
layers += [nn.LogSoftmax()] ### <<<------
self.layers = nn.Sequential(*layers)
def forward(self, x): return self.layers(x)
### /Proposed Version ###
########################
def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes)
def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes)
def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24)
def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16)
def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes)
# demonstrator
def PR_darknet_53(num_classes=1000): return PR_Darknet([1,2,8,8,4], num_classes)
In [4]:
def display_head(fastai_learner, λ_name=None, show_nums=False):
"""displays final conv block and network head."""
# parse
if λ_name == None:
λ_name='DarknetBlock'
fastai_learner = fastai_learner[0]
if show_nums:
fastai_learner = str(fastai_learner).split('\n')
n = len(fastai_learner)
else:
n = len(str(fastai_learner).split('\n'))
j = 1
# find final conv block
for i in range(n):
if λ_name in str(fastai_learner[-j]): break
j += 1
# print head & 'neck'
for i in range(j):
print(fastai_learner[i-j])
# don't mind the λ's.. l's alone look too much like 1's
# It's easy to switch keyboards on a Mac or Windows (ctrl/win-space)
# fn NOTE: the `learner[0]` for Darknet is the same as `learner`
# for other models; hence the if/else logic to keep printouts neat
# show_nums displays layer numbers - kinda
In [5]:
PATH = Path('data/imagenet')
In [6]:
sz = 256
bs = 32
In [7]:
tfms = tfms_from_stats(imagenet_stats, sz)
In [8]:
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, val_name='train')
In [9]:
f_model = darknet_53()
In [10]:
learner = ConvLearner.from_model_data(f_model, model_data)
fastai.conv_learner logic sets criterion to torch.nn.functional.nll_loss:
In [11]:
learner.crit
Out[11]:
There is no final activation layer. The criterion will be applied to the final layer's output:
In [12]:
display_head(learner)
In this case the Learning Rate Finder 'fails' due to very small - often negative - loss values.
In [13]:
learner.lr_find()
learner.sched.plot()
Sometimes the LR Finder manages to produce a plot, but the results leave much to be desired:
In [15]:
learner.lr_find()
learner.sched.plot()
In [9]:
f_model = darknet_53()
In [10]:
learner = ConvLearner.from_model_data(f_model, model_data, crit=F.cross_entropy)
In [11]:
learner.crit
Out[11]:
There is no final activation layer. The criterion will be applied to the final layer's output:
In [12]:
display_head(learner)
In [13]:
learner.lr_find()
learner.sched.plot()
This is the shape of plot we expect to see.
In [9]:
f_model = PR_darknet_53()
In [10]:
learner = ConvLearner.from_model_data(f_model, model_data)
fastai.conv_learner logic sets criterion to torch.nn.functional.nll_loss:
In [11]:
learner.crit
Out[11]:
However cross_entropy is NLL(LogSoftmax). The final layer is a LogSoftmax activation. The NLL criterion applied to its output will produce a Cross Entropy loss function.
In [12]:
display_head(learner)
In [13]:
learner.lr_find()
learner.sched.plot()
In working on this, I found some behavior that seemed odd, but may be normal. The CIFAR-10 dataset from fast.ai will be used here.
In [9]:
from fastai.conv_learner import *
In [10]:
PATH = Path('data/cifar10')
In [11]:
sz = 64 # darknet53 architecture can't handle 32x32 small input
bs = 64
tfms = tfms_from_stats(imagenet_stats, sz)
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, val_name='test')
A version of darknet.py with the proposed changes above is used.
In [12]:
from fastai.models import darknet
Comparing resnet18 from PyTorch to resnet18 from FastAI:
In [13]:
from torchvision.models import resnet18
In [14]:
resnet18
Out[14]:
The fastai library does not alter the resnet18 model it imports from PyTorch.
For comparison, the darknet53 import from fastai looks like this:
In [15]:
darknet.darknet_53
Out[15]:
By contrast, the types of the initialized models:
In [16]:
type(resnet18(num_classes=10))
Out[16]:
In [17]:
type(darknet.darknet_53(num_classes=10))
Out[17]:
The PyTorch ResNet18 model has no output activation layer.
In [18]:
f_model = resnet18()
In [19]:
display_head(str(f_model), λ_name='BasicBlock', show_nums=True)
When a learner is intialized via ConvLearner.pretrained
, the fastai library adds a classifier head to the model via the ConvnetBuilder
class.
NOTE that the definition of the model is passed in, and not a model object.
In [20]:
learner = ConvLearner.pretrained(resnet18, model_data)
In [21]:
display_head(learner, λ_name='BasicBlock', show_nums=True)
the fastai library adds the necessary LogSoftmax layer to the end of the model
NOTE: default constructor for resnet18 & darknet is 1000 classes (ImageNet). fastai lib finds the correct num_classes
from the ModelData
object. That's why the resnet18 model above has 1000 output features, and the resnet18 learner below it has the correct 10.
The criterion, the loss function, of the learner is still F.nll_loss
:
In [22]:
learner.crit
Out[22]:
But since the final layer is an nn.LogSoftmax
, the effective loss function is Cross Entropy.
NOTE that this does not happen when the learner is initalized via .from_model_data
:
In [23]:
learner = ConvLearner.from_model_data(resnet18(num_classes=10), model_data)
In [24]:
display_head(learner, λ_name='BasicBlock', show_nums=True)
ConvLearner.pretrained
will only accept model definitions, not models themselves:
In [25]:
learner = ConvLearner.pretrained(resnet18, model_data)
In [26]:
learner = ConvLearner.pretrained(resnet18(num_classes=10), model_data)
However the current version of Darknet is not accepted by ConvLearner.pretrained
at all. This makes sense, given that the model is not yet pretrained, but also suggests further work is needed to integrate the model into the library.
In [27]:
learner = ConvLearner.pretrained(darknet.darknet_53, model_data)
In [28]:
learner = ConvLearner.pretrained(darknet.darknet_53(num_classes=10), model_data)
The from_model_data
method works, as seen in section 1.
In [378]:
# Use this version of `display_head` if the other is too finicky for you.
# NOTE: fastai learners other than darknet will have to be entered as:
# [str(learner_or_model).split('\n')]
def display_head(fastai_learner, λ_name=None):
"""displays final conv block and network head."""
n = len(fastai_learner[0])
if λ_name == None: λ_name='DarknetBlock'
j = 1
# find final conv block
for i in range(n):
if λ_name in str(fastai_learner[0][-j]): break
j += 1
# print head & 'neck'
for i in range(j):
print(fastai_learner[0][i-j])
# display_head(learner, λ_name='BasicBlock')
display_head(learner1) #darknet learner
print('--------')
display_head([str(learner2).split('\n')], λ_name='BasicBlock') #resnet learner
print('--------')
display_head([str(f_model).split('\n')], λ_name='BasicBlock') #resnet model
In [ ]: