In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.imports import *
from fastai.conv_learner import *
from fastai.torch_imports import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
In [2]:
PATH = "data/dogbreeds/"
In [3]:
arch = resnext50
sz = 224
bs = 64
In [4]:
labels_csv = f'{PATH}labels.csv'
n = len(list(open(labels_csv)))-1
val_idxs = get_cv_idxs(n)
In [1]:
def get_data(sz, bs):
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(PATH, 'train', labels_csv, bs=bs, tfms=tfms,
val_idxs=val_idxs, suffix='.jpg', test_name='test')
return data if sz > 300 else data.resize(340, 'tmp')
# see: http://forums.fast.ai/t/dog-breed-identification-challenge/7464/51
In the below code, why are we choosing 300 as the default size value to check if condition?
<><><><>
Great question. Since we have max_zoom=1.1, I figured we should ensure our images are at release sz*1.1. And I figured resizing them to 340x340 would save plenty of time, and leave plenty of room to experiment.
http://forums.fast.ai/t/dog-breed-identification-challenge/7464/51
Note this notebook was run with ... if sz < 300 ... since I didn't understand what was going on.
In [40]:
# data = get_data(sz, bs)
# labels_df = pd.read_csv(labels_csv)
# labels_df.pivot_table(index='breed', aggfunc=len).sort_values('id', ascending=False)
# fn = PATH + data.trn_ds.fnames[0]
# PIL.Image.open(fn)
# size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}
In [41]:
# row_sz, col_sz = list(zip(*size_d.values()))
# row_sz, col_sz = np.array(row_sz), np.array(col_sz)
# plt.hist(row_sz[row_sz < 1000]); plt.hist(col_sz[col_sz < 1000]);
In [6]:
from sklearn import metrics
In [44]:
data = get_data(sz, bs)
learn = ConvLearner.pretrained(arch, data, precompute=True)
In [49]:
learn.lr_find()
learn.sched.plot()
In [50]:
learn.fit(2e-2, 2)
In [51]:
learn.fit(2e-2, 2)
In [55]:
learn.precompute=False
learn.fit(2e-2, 2)
In [56]:
learn.save('RNx50_224_pre')
In [11]:
# increasing size - taking advtg of Fully-Convolutional Arch
learn.set_data(get_data(299, 48))
In [65]:
learn.fit(1e-2, 3, cycle_len=1)
In [69]:
learn.save('RNx50_224_pre')
In [8]:
data = get_data(299, 48)
learn = ConvLearner.pretrained(arch, data)
learn.load('RNx50_224_pre')
In [10]:
learn.freeze()
In [12]:
log_preds, y = learn.TTA()
probs = np.exp(log_preds)
accuracy(log_preds, y), metrics.log_loss(y, probs)
Out[12]:
In [13]:
test_preds = np.exp(learn.TTA(is_test=True)[0])
In [ ]:
In [ ]:
In [ ]:
Rerunning without validation for predictions:
In [ ]:
from sklearn import metrics
PATH = "data/dogbreeds/"
arch = resnext50
sz = 224
bs = 64
labels_csv = f'{PATH}labels.csv'
# n = len(list(open(labels_csv)))-1
val_idxs = get_cv_idxs(0)
def get_data(sz, bs):
tfms = tfms_from_model(arch, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(PATH, 'train', labels_csv, bs=bs, tfms=tfms,
val_idxs=val_idxs, suffix='.jpg', test_name='test')
return data if sz < 300 else data.resize(340, 'tmp')
In [ ]:
data = get_data(sz, bs)
learn = ConvLearner.pretrained(arch, data, precompute=True)
learn.fit(1e-2, 2)
learn.precompute=False
learn.fit(1e-2, 5, cycle_len=1)
learn.set_data(get_data(299, bs=32))
learn.fit(1e-2, 3, cycle_len=1)
learn.fit(1e-2, 3, cycle_len=1, cycle_mult=2)