L1/L2 Code Along IIA: lesson1.vgg
Replicating: https://github.com/fastai/fastai/blob/master/courses/dl1/lesson1-vgg.ipynb
In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
In [3]:
PATH = "data/dogscats/"
sz=224
ARCH = vgg16
bs = 16
In [4]:
# Uncomment if need to reset precomputed activations
!rm -rf {PATH}tmp
In [5]:
data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms_from_model(ARCH, sz))
In [6]:
learn = ConvLearner.pretrained(ARCH, data, precompute=True)
In [7]:
learn.fit(0.01, 3, cycle_len=1)
In [8]:
tfms = tfms_from_model(ARCH, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
In [9]:
data = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, num_workers=4)
learn = ConvLearner.pretrained(ARCH, data, precompute=True)
In [10]:
learn.fit(lrs=1e-2, n_cycle=2)
In [11]:
learn.save('vgg16_00') # just in case I run out of memory below
In [12]:
learn.precompute=False
In [13]:
learn.fit(lrs=1e-2, n_cycle=1, cycle_len=1)
In [14]:
learn.save('vgg16_01')
In [15]:
learn.unfreeze()
learn.data.bs = 4 # training ConvNets takes lots of Memory, cut down bs to prevent crashes
In [16]:
lr = np.array([1e-4, 1e-3, 1e-2])
In [17]:
learn.fit(lrs=lr, n_cycle=1, cycle_len=1)
In [18]:
learn.save('vgg16_02')
In [19]:
learn.lr_find()
In [20]:
learn.fit(lrs=lr, n_cycle=3, cycle_len=1, cycle_mult=2)
In [21]:
learn.save('vgg16_03')
In [22]:
learn.fit(lrs=lr, n_cycle=3, cycle_len=3)
In [23]:
learn.save('vgg16_04')
In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
In [3]:
PATH = "data/dogscats/"
sz=224
ARCH = vgg16
bs = 16
In [4]:
data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms_from_model(ARCH, sz),
test_name = 'test1')
In [5]:
# No reason to precompute activations as I'm running a single prediction run on the test set
# Also, since I trained all ConvLayers earlier... where will it know how to compute
# the activations if I haven't loaded the weights yet?
learn = ConvLearner.pretrained(ARCH, data, precompute=False)
In [11]:
# the test data set
len(learn.data.test_dl.dataset)
Out[11]:
In [16]:
learn.load('vgg16_04')
In [21]:
# This took about 32 minutes
log_preds = learn.TTA(n_aug=4, is_test=True)[0]
In [28]:
log_preds_df = pd.DataFrame(log_preds, columns=['dog','cat'])
In [30]:
log_preds_df.to_feather(PATH + 'results/' + 'log_preds')
In [34]:
# log_preds_df = pd.read_feather(PATH + 'results/' + 'log_preds')
In [44]:
test_preds = np.exp(log_preds)
In [70]:
ids = [f[6:-4] for f in learn.data.test_dl.dataset.fnames]
In [71]:
preds = [np.argmax(pred) for pred in test_preds]
In [72]:
submission = pd.DataFrame({'id': ids, 'label': preds})
In [67]:
submission = pd.DataFrame(preds)
submission.columns = ['label']
In [68]:
submission.insert(0, 'id', ids)
In [69]:
submission.head()
Out[69]:
In [73]:
submission.to_csv(PATH + 'subm/' + 'submission_vgg16_04.gz', compression='gzip', index=False)
In [74]:
FileLink(PATH + 'subm/' + 'submission_vgg16_04.gz')
Out[74]:
In [2]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
In [3]:
PATH = "data/dogscats/"
sz=224
ARCH = vgg16
bs = 16
In [4]:
data = ImageClassifierData.from_paths(PATH, bs=bs, tfms=tfms_from_model(ARCH, sz),
test_name = 'test1')
In [5]:
# No reason to precompute activations as I'm running a single prediction run on the test set
# Also, since I trained all ConvLayers earlier... where will it know how to compute
# the activations if I haven't loaded the weights yet?
learn = ConvLearner.pretrained(ARCH, data, precompute=False)
In [6]:
learn.load('vgg16_04')
In [8]:
log_preds, y = learn.TTA()
accuracy(log_preds, y)
Out[8]:
In [30]:
df = pd.read_feather(PATH + 'results/' + 'log_preds')
In [31]:
df.head()
Out[31]:
In [32]:
preds = np.array(list(zip(*(df['dog'],df['cat']))))
In [41]:
preds = np.exp(preds)
In [42]:
preds = [np.argmax(pred) for pred in preds]
In [48]:
new_preds = [int(i==0) for i in preds]
In [49]:
new_preds[:10]
Out[49]:
In [52]:
ids = [f[6:-4] for f in learn.data.test_dl.dataset.fnames]
submission = pd.DataFrame({'id': ids, 'label': new_preds})
In [53]:
submission.to_csv(PATH + 'subm/' + 'submission_vgg16_04_wtf.gz', compression='gzip', index=False)
FileLink(PATH + 'subm/' + 'submission_vgg16_04_wtf.gz')
Out[53]:
In [55]:
log_preds_df = np.array(list(zip(*(df['dog'],df['cat']))))
In [57]:
test_preds = np.exp(log_preds_df)
In [59]:
test_preds = np.clip(test_preds, 0.05, 0.95)
In [66]:
data.classes
Out[66]:
In [67]:
labels = test_preds[:,1]
In [68]:
labels[:10]
Out[68]:
In [64]:
learn.data.test_dl.dataset.fnames
Out[64]:
In [69]:
ids = [f[6:-4] for f in learn.data.test_dl.dataset.fnames]
submission = pd.DataFrame({'id': ids, 'label': labels})
In [71]:
submission.to_csv(PATH + 'subm/' + 'submission_vgg16_04_omg.csv.gz', compression='gzip', index=False)
FileLink(PATH + 'subm/' + 'submission_vgg16_04_omg.csv.gz')
Out[71]:
This model took roughly 10 ~ 14 hours to train on a GTX870M / Intel Core i7 machine. It achieved a score of 0.07389 when clipped to [0.05:0.95] @ 201/1314 place.
In [ ]: