In [1]:
    
%reload_ext autoreload
%autoreload 2
%matplotlib inline
    
In [2]:
    
from fastai.imports import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
    
In [3]:
    
PATH = "data/dogbreeds/"
TRAIN = "train/"; VALID = "valid/"; TEST = "test/"; SUBM = "subm/"; RSLT = "results/"
    
In [4]:
    
sz = 224
bs = 64
# arch = resnext101_64
arch = resnext50
# arch = resnet34
# arch = resnet50
    
Explanation of the following code: get_cv_idxs(..):
def get_cv_idxs(n, cv_idx=4, val_pct=0.2, seed=42):
1    np.random.seed(seed)
2    n_val = int(val_pct*n)
3    idx_start = cv_idx*n_val
4    idxs = np.random.permutation(n)
5    return idxs[idx_start:idx_start+n_val]
function takes in n number, cv_idx cross-validation index (default 4), val_pct validation percent (default 20%), and a seed (default meaning of life).
n_val number of valids to be floor(validation percent x total number).idx_start starting index to be the number of valids n_val times the cross-validation index.idxs to be a random permutation of all integers [0:n)n_val wide slice of the idxs array starting at idx_startI don't know yet what the significance of cv_idx is.
In [5]:
    
# Counts lines in labels.csv. File contains all labels --> can use as a filecount
# Pass filecount n into val_idxs(n) 
label_csv = f'{PATH}labels.csv'
n = len(list(open(label_csv)))-1
val_idxs = get_cv_idxs(n)
    
In [6]:
    
tfms = tfms_from_model(arch, bs, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_csv(path=PATH, folder='train', csv_fname=label_csv,
                                    bs=bs, tfms=tfms, val_idxs=val_idxs, suffix='.jpg')
    
In [7]:
    
learn = ConvLearner.pretrained(arch, data=data, precompute=False)
    
In [8]:
    
data.trn_dl.dataset.fnames[:10]
    
    Out[8]:
In [9]:
    
list(zip(data.classes))[:10]
    
    Out[9]:
In [10]:
    
lrf = learn.lr_find()
learn.sched.plot()
    
    
 
 
    
    
    
    
In [11]:
    
learn.fit(lrs=1e-1, n_cycle=4, cycle_len=1)
    
    
 
 
    
    
    
    
    
In [12]:
    
learn.fit(lrs=1e-1, n_cycle=3, cycle_len=1, cycle_mult=2)
    
    
 
 
    
    
    
In [14]:
    
lr = 1e-1
lrs = np.array([lr/1e2,lr/1e1,lr])
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
    
    
 
 
    
    
    
In [18]:
    
learn.data.resize(300, 'tmp')
    
    
 
 
    
    Out[18]:
In [19]:
    
learn.data.sz
    
    Out[19]:
In [20]:
    
learn.freeze()
    
In [21]:
    
lrf = learn.lr_find()
learn.sched.plot()
    
    
 
 
    
    
    
    
In [22]:
    
lr = 5e-3
learn.fit(lr, n_cycle=1, cycle_len=1)
    
    
 
 
    
    
    
    
    
In [23]:
    
    
    Out[23]:
In [24]:
    
fn = PATH + data.trn_ds.fnames[0]; fn
    
    Out[24]:
In [25]:
    
img = PIL.Image.open(fn); img
    
    Out[25]:
In [26]:
    
img.size
    
    Out[26]:
In [27]:
    
size_d = {k: PIL.Image.open(PATH+k).size for k in data.trn_ds.fnames}
    
In [28]:
    
row_sz, col_sz = list(zip(*size_d.values()))
    
In [29]:
    
row_sz = np.array(row_sz); col_sz = np.array(col_sz)
    
In [30]:
    
row_sz[:5]
    
    Out[30]:
In [32]:
    
plt.hist(row_sz);
    
    
In [33]:
    
plt.hist(row_sz[row_sz < 1000])
    
    Out[33]:
    
In [34]:
    
plt.hist(col_sz[col]);
    
    
In [35]:
    
plt.hist(col_sz[col_sz < 1000])
    
    Out[35]:
    
In [36]:
    
len(data.trn_ds)
    
    Out[36]:
In [ ]:
    
len
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [10]:
    
# # NOTE: THIS IS W/ bs=128
# lrf = learn.lr_find()
# learn.sched.plot()
    
    
 
 
    
    
    
    
In [11]:
    
lr = 2e-2
    
In [11]:
    
learn.fit(lr, 3, cycle_len=1, cycle_mult=2)
    
    
 
 
    
    
    
    
    
In [ ]:
    
    
In [12]:
    
lrs = np.array([lr/9,lr/3,lr])
    
In [13]:
    
learn.unfreeze()
learn.fit(lrs, 3, cycle_len=1, cycle_mult=2)
    
    
 
 
    
    
    
In [14]:
    
learn.save(f'{sz}_dogbreeds_RN50_00')
    
In [ ]: