In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
from fastai.imports import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
In [3]:
PATH = "data/dogbreeds/"
TRAIN = "train/"; VALID = "valid/"; TEST = "test/"; SUBM = "subm/"; RSLT = "results/"
In [4]:
sz = 224
bs = 64
# arch = resnext101_64
arch = resnext50
# arch = resnet34
# arch = resnet50
Explanation of the following code: get_cv_idxs(..)
:
def get_cv_idxs(n, cv_idx=4, val_pct=0.2, seed=42):
1 np.random.seed(seed)
2 n_val = int(val_pct*n)
3 idx_start = cv_idx*n_val
4 idxs = np.random.permutation(n)
5 return idxs[idx_start:idx_start+n_val]
function takes in n
number, cv_idx
cross-validation index (default 4
), val_pct
validation percent (default 20%), and a seed (default meaning of life).
n_val
number of valids to be floor(
validation percent x
total number)
.idx_start
starting index to be the number of valids n_val
times the cross-validation index.idxs
to be a random permutation of all integers [0:n)
n_val
wide slice of the idxs
array starting at idx_start
I don't know yet what the significance of cv_idx
is.
In [5]:
# Counts lines in labels.csv. File contains all labels --> can use as a filecount
# Pass filecount n into val_idxs(n)
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)
In [6]:
tfms = tfms_from_model(arch, bs, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(path=PATH, folder='train', csv_fname=label_csv,
bs=bs, tfms=tfms, val_idxs=val_idxs, suffix='.jpg')
In [7]:
learn = ConvLearner.pretrained(arch, data=data, precompute=False)
In [8]:
data.trn_dl.dataset.fnames[:10]
Out[8]:
In [9]:
list(zip(data.classes))[:10]
Out[9]:
In [10]:
lrf = learn.lr_find()
learn.sched.plot()
In [11]:
learn.fit(lrs=1e-1, n_cycle=4, cycle_len=1)
In [12]:
learn.fit(lrs=1e-1, n_cycle=3, cycle_len=1, cycle_mult=2)
In [14]:
lr = 1e-1
lrs = np.array([lr/1e2,lr/1e1,lr])
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
In [18]:
learn.data.resize(300, 'tmp')
Out[18]:
In [19]:
learn.data.sz
Out[19]:
In [20]:
learn.freeze()
In [21]:
lrf = learn.lr_find()
learn.sched.plot()
In [22]:
lr = 5e-3
learn.fit(lr, n_cycle=1, cycle_len=1)
In [23]:
Out[23]:
In [24]:
fn = PATH + data.trn_ds.fnames[0]; fn
Out[24]:
In [25]:
img = PIL.Image.open(fn); img
Out[25]:
In [26]:
img.size
Out[26]:
In [27]:
size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}
In [28]:
row_sz, col_sz = list(zip(*size_d.values()))
In [29]:
row_sz = np.array(row_sz); col_sz = np.array(col_sz)
In [30]:
row_sz[:5]
Out[30]:
In [32]:
plt.hist(row_sz);
In [33]:
plt.hist(row_sz[row_sz < 1000])
Out[33]:
In [34]:
plt.hist(col_sz[col]);
In [35]:
plt.hist(col_sz[col_sz < 1000])
Out[35]:
In [36]:
len(data.trn_ds)
Out[36]:
In [ ]:
len
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [10]:
# # NOTE: THIS IS W/ bs=128
# lrf = learn.lr_find()
# learn.sched.plot()
In [11]:
lr = 2e-2
In [11]:
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
In [ ]:
In [12]:
lrs = np.array([lr/9,lr/3,lr])
In [13]:
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
In [14]:
learn.save(f'{sz}_dogbreeds_RN50_00')
In [ ]: