Preparing to train a new model on ImageNet by going through a sample set of it first.
In [9]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2
In [10]:
from fastai.conv_learner import *
from fastai.models import darknet
In [11]:
from pathlib import Path
import os
import pandas as pd
In [12]:
PATH = Path('data/imagenet')
PATH_TRAIN = PATH/'train'
In [13]:
PATH, PATH_TRAIN
Out[13]:
In [6]:
folder = os.listdir(PATH_TRAIN)[0]
os.listdir(PATH_TRAIN/folder)
Out[6]:
In [16]:
fimg = PATH_TRAIN / folder / os.listdir(PATH_TRAIN/folder)[0]
Image.open(fimg)
Out[16]:
In [6]:
def view_img(folder_path, idx=-1):
files = os.listdir(folder_path)
if idx < 0: idx = np.random.randint(len(files))
fimg = str(folder_path / files[idx])
img = cv2.imread(str(fimg))
plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
In [49]:
view_img(PATH_TRAIN/folder, idx=0)
In [50]:
view_img(PATH_TRAIN/folder)
In [51]:
view_img(PATH_TRAIN/folder)
In [52]:
view_img(PATH_TRAIN/folder)
In [12]:
# os.listdir(PATH_TRAIN)
# folders = os.listdir(PATH_TRAIN)
# for folder in folders:
# print(f'{len(os.listdir(PATH_TRAIN/folder))}')
In [7]:
imagenet_labels = pd.read_csv(PATH/'imagenet_labels.txt', delim_whitespace=True, header=None)
In [8]:
imagenet_labels.head()
Out[8]:
I'm not sure whether I should keep the class number or not - since I think that's an internal detail that's handled automatically - so I'll just make a CSV matching folder codes to class names.
In [26]:
# imagenet_labels = imagenet_labels.drop(columns=[1])
On second thought, I can just use the .from_paths fastai method, and have a dictionary to lookup classes.
In [35]:
imagenet_labels.as_matrix()
Out[35]:
In [41]:
# {c0 : c2 for c0,_,c2 in imagenet_labels.as_matrix()}
In [9]:
imagenet_labels_lookup = {c0 : c2 for c0,_,c2 in imagenet_labels.as_matrix()}
In [9]:
def reset_valset(path):
path_val = path/'valid'
path_trn = path/'train'
if not os.path.exists(path_val):
print('No validation directory to reset.')
return
for folder in path_val.iterdir():
for f in folder.iterdir():
os.rename(f, path_trn / str(f).split('valid/')[-1])
def create_valset(path, p=0.15, seed=0):
np.random.seed(seed=seed)
path_val = path/'valid'
path_trn = path/'train'
reset_valset(path)
# move random p-percent selection from train/ to valid/
for folder in path_trn.iterdir():
os.makedirs(path_val/str(folder).split('train/')[-1], exist_ok=True)
flist = list(folder.iterdir())
n_move = int(np.round(len(flist) * p))
fmoves = np.random.choice(flist, n_move, replace=False)
for f in fmoves:
os.rename(f, path_val / str(f).split('train/')[-1])
def count_files(path):
count = 0
for folder in path.iterdir():
count += len(list(folder.glob('*')))
return count
In [161]:
count_files(PATH_TRAIN)
Out[161]:
In [164]:
reset_valset(PATH)
In [208]:
create_valset(PATH)
In [209]:
count_files(PATH_TRAIN)
Out[209]:
In [211]:
19439 * (1 - .15)
Out[211]:
In [11]:
count_files(PATH/'valid')
Out[11]:
In [20]:
reset_valset(PATH)
count_files(PATH_TRAIN), count_files(PATH/'valid')
Out[20]:
In [217]:
folder = next(iter(PATH_TRAIN.iterdir()))
view_img(folder)
In [218]:
folder = next(iter((PATH/'valid').iterdir()))
view_img(folder)
In [13]:
sz = 256
bs = 32
darknet53 = darknet.darknet_53()
# tfms = tfms_from_stats(imagenet_stats, sz, aug_tfms=transforms_side_on, max_zoom=1.05, pad=sz//8)
# tfms = tfms_from_model(darknet53, sz) # loads imagenet_stats
tfms = tfms_from_model(resnet34, sz)
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, num_workers=4)
In [14]:
learner = ConvLearner.from_model_data(darknet53, data)
# learner = ConvLearner.from_model_data(darknet53, model_data)
# learner.crit = F.nll_loss
In [15]:
learner.crit
Out[15]:
In [16]:
# learner.summary()
In [ ]:
learner.lr_find()
learner.sched.plot(10,5)
In [14]:
sz = 256
bs = 32
tfms = tfms_from_stats(imagenet_stats, sz, aug_tfms=transforms_side_on, max_zoom=1.05, pad=sz//8)
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, num_workers=4)
model_data = get_data(sz,bs)
darknet53 = darknet.darknet_53()
In [15]:
learner = ConvLearner.from_model_data(darknet53, model_data)
learner.crit = F.nll_loss
In [ ]:
learner.lr_find()
Making sure I can get similar performance to Sylvain Gugger's implementation of Darknet53.
In [6]:
# list((PATH/'train').iterdir())
In [22]:
L = list((PATH/'train').iterdir())
L1 = L[1].glob('*')
# list(L1)
In [23]:
filenames, classes = [], []
TRN_PATH = (PATH/'train')
for directory in TRN_PATH.iterdir():
for fn in directory.glob('*'):
filenames.append(str(fn)[len(str(TRN_PATH))+1:])
classes.append(str(directory)[len(str(TRN_PATH))+1:])
class_names = list(set(classes))
class2idx = {c:i for i,c in enumerate(class_names)}
labels = [class2idx[c] for c in classes]
In [24]:
df = pd.DataFrame({'filenames':filenames, 'cats':labels}, columns=['filenames', 'cats'])
df.head()
Out[24]:
In [25]:
df.to_csv(PATH/'train.csv', index=False)
In [11]:
# stats = (np.array([0.4855, 0.456, 0.406]), np.array([0.229, 0.224, 0.225]))
In [12]:
def get_data(sz,bs):
tfms = tfms_from_model(resnet50,sz)
return ImageClassifierData.from_csv(PATH,'train',PATH/'train.csv',bs=bs,tfms=tfms)
In [13]:
size = 256
batch_size = 16
data = get_data(size, batch_size)
In [14]:
class ConvBN(nn.Module):
# convolutional layer then BatchNorm
def __init__(self, ch_in, ch_out, kernel_size=3, stride=1, padding=0):
super().__init__()
self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride,
padding=padding, bias=False)
self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
def forward(self, x):
return F.leaky_relu(self.bn(self.conv(x)), negative_slope=0.1)
class DarknetBlock(nn.Module):
# the basic blocks
def __init__(self, ch_in):
super().__init__()
ch_hid = ch_in//2
self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
def forward(self, x):
out = self.conv1(x)
out = self.conv2(out)
return out + x
class Darknet(nn.Module):
# Replicates table 1 from the YOLOv3 paper
def __init__(self, num_blocks, num_classes=1000):
super().__init__()
self.conv = ConvBN(3, 32, kernel_size=3, stride=1, padding=1)
self.layer1 = self.make_group_layer(32, num_blocks[0])
self.layer2 = self.make_group_layer(64, num_blocks[1], stride=2)
self.layer3 = self.make_group_layer(128,num_blocks[2], stride=2)
self.layer4 = self.make_group_layer(256,num_blocks[3], stride=2)
self.layer5 = self.make_group_layer(512,num_blocks[4], stride=2)
self.linear = nn.Linear(1024, num_classes)
def make_group_layer(self, ch_in, num_blocks, stride=1):
layers = [ConvBN(ch_in, ch_in*2, stride=stride)]
for i in range(num_blocks):
layers.append(DarknetBlock(ch_in*2))
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
return F.log_softmax(self.linear(out))
In [15]:
darknet53 = Darknet([1,2,8,8,4])
In [14]:
darknet53 = darknet.darknet_53()
In [15]:
learn = ConvLearner.from_model_data(darknet53, data)
learn.crit = F.nll_loss
In [16]:
# learn.summary()
In [17]:
learn.lr_find()
In [18]:
learn.lr_find()
In [6]:
## these parameters are universal
sz = 256
bs = 32
darknet53 = darknet.darknet_53()
In [7]:
## the .from_paths way (mine) with stock transforms
tfms = tfms_from_stats(imagenet_stats, sz)
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, val_name='train')
In [7]:
## the .from_csv way with stock transforms
tfms = tfms_from_model(resnet50, sz)
model_data = ImageClassifierData.from_csv(PATH, 'train', PATH/'train.csv',
bs=bs, tfms=tfms)
.from_paths
In [8]:
learner = ConvLearner.from_model_data(darknet53, model_data)
learner.lr_find()
learner.sched.plot()
.from_csv
In [8]:
learner = ConvLearner.from_model_data(darknet53, model_data)
learner.lr_find()
learner.sched.plot()
.from_paths starts at -0.0007 or so, reaches above 0 within 3'ish iterations, and runs at around 5s/it.
.from_csv starts at -0.00366, dips to -0.0213 at iter 4, and prompty gets back up above 0, dipping below again at iter 7 & going back above at 8. Runs at around 5s/it.
They both seem just about the same, when transforms are set to stock.
However, I need to know why number of iterations was out of 608 for .from_paths and 414 for .from_csv. First I need to ensure the dataset size is equal for both DataLoader methods.
--> right, I answered that question - was silly. The procedure above built the CSV from files in the training directory -- which was after I created the validation set. So it had only read the training set files, and not the full dataset. Meaning also: if I hadn't reset the validation set before running the test above, both methods (.from_csv and .from_paths) would have had the same number of total iterations (414 in that case).
In [11]:
## dataset size for .from_paths:
count_files(PATH/'train')
Out[11]:
In [26]:
## dataset size for .from_csv:
df = pd.read_csv(PATH/'train.csv')
df.items
Out[26]:
So now that I'm getting the same performance - as a sanity check that I'm not doing something very wrong - the only question that remains now is: what is the effect of different transforms on the Learner's ability to train. Am I breaking things by using wrong / bad transforms? Let's find out.
tfms_from_stats(imagenet_stats) == tfms_from_model(darknet53) == tfms_from_model(resnet50) ?
In [6]:
sz = 256
bs = 32
darknet53 = darknet.darknet_53()
In [7]:
# tfms = tfms_from_stats(imagenet_stats, sz)
# tfms = tfms_from_model(darknet53, sz)
tfms = tfms_from_model(resnet50, sz)
In [8]:
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, val_name='train')
In [9]:
learner = ConvLearner.from_model_data(darknet53, model_data)
tfms = tfms_from_stats(imagenet_stats, sz):
Worked after about 2 failures.
In [10]:
learner.lr_find()
learner.sched.plot()
tfms = tfms_from_model(darknet53, sz):
Worked after about 2 failures.
In [10]:
learner.lr_find()
learner.sched.plot()
tfms = tfms_from_model(resnet50, sz):
After 4 failures: the 1st plot. Subsequent try resulted in a similar 'low-res' plot. 1 more failture, then the 2nd plot was created.
In [10]:
learner.lr_find()
learner.sched.plot()
In [10]:
learner.lr_find()
learner.sched.plot()
In [6]:
sz = 256
bs = 32
darknet53 = darknet.darknet_53()
In [7]:
# tfms = tfms_from_stats(imagenet_stats, sz)
tfms = tfms_from_model(darknet53, sz)
In [8]:
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, val_name='train')
In [9]:
# learner = ConvLearner.from_model_data(darknet53, model_data)
learner = ConvLearner.from_model_data(darknet53, model_data, crit=F.cross_entropy)
In [10]:
learner.crit
Out[10]:
tfms = tfms_from_model(darknet53, sz):
In [11]:
learner.lr_find()
learner.sched.plot()
Right, so it turns out that the loss function was automatically being set to NLL Loss instead of CE Loss (Negative-Log Likelihood; Cross Entropy (combines Log-Softmax & NLL Loss)). Setting it to cross entropy fixed all issues immediately. Thanks to SGugger.
So as a final sanity check: I'll run the learning rate finder again, this time with all my transform hyparameters used -- just to make sure it works.
Then train on a the ImageNet sampleset using Cyclical Learning Rates.
Since I'm just checking that the Learning Rate Finder will run on this, I'm point the validation set at the training folder (to avoid an error - since I'm not actually training right now).
In [14]:
bs = 32
sz = 256
darknet53 = darknet.darknet_53()
tfms = tfms_from_stats(imagenet_stats, sz, aug_tfms=transforms_side_on,
max_zoom=1.05, pad=sz//8)
model_data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms, val_name='train')
learner = ConvLearner.from_model_data(darknet53, model_data, crit=F.cross_entropy)
In [7]:
learner.lr_find()
learner.sched.plot()
Viewing augmentations (adapted from fastai DL1 Lesson1):
In [64]:
def get_augs(iters=1):
data = ImageClassifierData.from_paths(PATH, bs=2, tfms=tfms, num_workers=1, val_name='train')
data_iter = iter(data.aug_dl)
for i in range(iters):
x,_ = next(data_iter)
return data.trn_ds.denorm(x)[1]
def plots(ims, figsize=(12,6), rows=1, titles=None):
f = plt.figure(figsize=figsize)
for i in range(len(ims)):
sp = f.add_subplot(rows, len(ims)//rows, i+1)
sp.axis('Off')
if titles is not None: sp.set_title(titles[i], fontsize=16)
plt.imshow(ims[i])
In [65]:
ims = np.stack([get_augs(20) for i in range(6)])
plots(ims, rows=2)
I checked other pictures - they look fine. This one looks tough though.. Hopefully it's not too much of an issue for the model to train -- with so much of the fish being out of frame sometimes.
At this point, I'm ready to get started with the model. I'll use Cyclical Learning Rates (Leslie Smith, 2015 arχiv: 1506.01186). From what I understand from the paper and from the fast.ai forums, I'll run it for a cycle_len of 3 epochs.
In [ ]: