lesson1-rxt50-CA.ipynb -- Code Along of:


Reimplementing the dogsvcats classifier in ResNetXt50

Dogs v Cats super-charged:

In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
# This file contains all the main external libs we'll use
from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

PATH = "data/dogscats/"
sz = 299
ARCH = resnext50
bs = 8 # if a TitanX is maxing out at 28, I'll give this 870M 8..

In [3]:
tfms = tfms_from_model(ARCH, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=bs, num_workers=4)
learn = ConvLearner.pretrained(ARCH, data, precompute=True, ps=0.5)
# conv_learner.py: class ConvnetBuilder(): ps (float or array of float): dropout parameters
# NOTE: http://forums.fast.ai/t/error-when-trying-to-use-resnext50/7555
# save weights to fastai/fastai/  -- until this is automatic

Just looking at the fastai library source code while the above works:

class ConvLearner(Learner):
    def __init__(self, data, models, precompute=False, **kwargs):
        self.precompute = False
        super().__init__(data, models, **kwargs)
        self.crit = F.binary_cross_entropy if data.is_multi else F.nll_loss
        if data.is_reg: self.crit = F.l1_loss
        elif self.metrics is None:
            self.metrics = [accuracy_multi] if self.data.is_multi else [accuracy]
        if precompute: self.save_fc1()
        self.precompute = precompute

further below:

    def save_fc1(self):
        act, val_act, test_act = self.activations
        if len(self.activations[0])==0:
            predict_to_bcolz(m, self.data.fix_dl, act)
        if len(self.activations[1])==0:
            predict_to_bcolz(m, self.data.val_dl, val_act)
        if len(self.activations[2])==0:
            if self.data.test_dl: predict_to_bcolz(m, self.data.test_dl, test_act)

        self.fc_data = ImageClassifierData.from_arrays(self.data.path,
                (act, self.data.trn_y), (val_act, self.data.val_y), self.data.bs, classes=self.data.classes,
                test = test_act if self.data.test_dl else None, num_workers=8)

Wait so does this mean when precompute=True is specified, .. oh of course.. fuck, that's awesome: fastai automatically computes all the activations for the train, validation, AND test data set (if provided) when precompute is set to True. I fucking love this library..

In [4]:
len(data.aug_dl.dataset), bs * 250

(2000, 2000)

In [5]:
len(data.fix_dl.dataset), bs * 2875

(23000, 23000)

Hah! How about that. That settles that mystery. Up above you'll see two progress bars:


These are the precomputations of the train & validation activations for the ResNetXt50 ConvNet.

The batch size is set to 8 (that answers my other question of how does fastai know how not to overload the RAM on my GPU: it uses the batch size parameter I specifed when initializing the data object, so it's on me).

The first precomputation runs through 2875 minibatches of size 8, for a total of 2875x8=23,000 images. The second run is on the validation set, for 250x8=2000 images. No precomputation is done for the test set as it was not passed in to the data object when it was iniitalized.

That's the whole 25,000 image data set for cats & dogs. Sweet.

In [6]:
learn.fit(lrs=1e-2, n_cycle=1) # specifyng lrs & n_cycle just to learn the API better

[ 0.       0.14572  0.02942  0.9905 ]                            

Hell yeah. Precomputed Activations kick ass. Just a few seconds..

... And now that fun ends:

In [7]:
learn.activations[0].shape # ResNeXt50 uses FC layers? I thought it was fully Conv
# yup: check learn.summary(); after a flatten, 2 FC (aka Linear) layers. Flatten 
# operation produces a 4096-long vector/tensor-thing.

(23000, 4096)

In [8]:
# Oh that's interesting: the way learn's activations are structured: for 
# train, valid, and test:
print("Train Activs: {}\nValid Activs: {}\n Test Activs: {}".format(
        learn.activations[0].shape, learn.activations[1].shape, learn.activations[2].shape))

Train Activs: (23000, 4096)
Valid Activs: (2000, 4096)
 Test Activs: (0, 4096)

In [9]:
learn.fit(lrs=1e-2, n_cycle=2, cycle_len=1)

[ 0.       0.15726  0.02561  0.993  ]                           
[ 1.       0.09184  0.02401  0.9925 ]                           

That took over 20 minutes, by comparison.

In [15]:

In [5]:
learn.fit(lrs=lr, n_cycle=3, cycle_len=1)

[ 0.       0.16924  0.02706  0.99102]                             
[ 1.       0.10178  0.02441  0.99202]                             
[ 2.       0.1734   0.035    0.98852]                             

In [6]:

In [ ]:

In [7]:
log_preds, y = learn.TTA()
accuracy(log_preds, y)


Analyzing Results:

In [8]:
preds = np.argmax(log_preds, axis=1)
probs = np.exp(log_preds[:,1])

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y, preds)

In [9]:
def rand_by_mask(mask): return np.random.choice(np.where(mask)[0], 4, replace=False)
def rand_by_correct(is_correct): return rand_by_mask((preds == data.val_y)==is_correct)

def plot_val_with_title(idxs, title):
    imgs = np.stack([data.val_ds[x][0] for x in idxs])
    title_probs = [probs[x] for x in idxs]
    return plots(data.val_ds.denorm(imgs), rows=1, titles=title_probs)

def plots(ims, figsize=(12,6), rows=1, titles=None):
    f = plt.figure(figsize=figsize)
    for i in range(len(ims)):
        sp = f.add_subplot(rows, len(ims)//rows, i+1)
        if titles is not None: sp.set_title(titles[i], fontsize=16)

def load_img_id(ds, idx): return np.array(PIL.Image.open(PATH+ds.fnames[idx]))

def plot_val_with_title(idxs, title):
    imgs = [load_img_id(data.val_ds,x) for x in idxs]
    title_probs = [probs[x] for x in idxs]
    return plots(imgs, rows=1, titles=title_probs, figsize=(16,8))

def most_by_mask(mask, mult):
    idxs = np.where(mask)[0]
    return idxs[np.argsort(mult * probs[idxs])[:4]]

def most_by_correct(y, is_correct): 
    mult = -1 if (y==1)==is_correct else 1
    return most_by_mask((preds == data.val_y)==is_correct & (data.val_y == y), mult)

In [10]:
plot_val_with_title(most_by_correct(0, False), "Most incorrect cats")

Most incorrect cats

In [11]:
plot_val_with_title(most_by_correct(1, False), "Most incorrect dogs")

Most incorrect dogs

In [6]:
tfms = tfms_from_model(ARCH, sz, aug_tfms=transforms_side_on, max_zoom=1.1)
data = ImageClassifierData.from_paths(PATH, tfms=tfms, bs=6, num_workers=4, test_name='test1')
learn = ConvLearner.pretrained(ARCH, data, precompute=False, ps=0.5)

See: https://github.com/fastai/fastai/issues/23 and https://github.com/pytorch/pytorch/issues/973

Something about max open file descriptors (fds) -- maybe bc new Archs in fastai make new files instead of folders? Anyway, to avoid the error 2 lines down: specify !ulimit -n 2048 or so. Hopefully a more permanent solution is available soon. Also the fix only seems to work in the terminal, before starting the Jupyter session.

In [12]:
!ulimit -n


In [7]:
log_preds = learn.TTA(is_test=True)

RuntimeError                              Traceback (most recent call last)
<ipython-input-7-5ebeb4867d0c> in <module>()
----> 1 log_preds = learn.TTA(is_test=True)

~/Kaukasos/FADL1/fastai/learner.py in TTA(self, n_aug, is_test)
    165         dl1 = self.data.test_dl     if is_test else self.data.val_dl
    166         dl2 = self.data.test_aug_dl if is_test else self.data.aug_dl
--> 167         preds1,targs = predict_with_targs(self.model, dl1)
    168         preds1 = [preds1]*math.ceil(n_aug/4)
    169         preds2 = [predict_with_targs(self.model, dl2)[0] for i in tqdm(range(n_aug), leave=False)]

~/Kaukasos/FADL1/fastai/model.py in predict_with_targs(m, dl)
    115     if hasattr(m, 'reset'): m.reset()
    116     preda,targa = zip(*[(get_prediction(m(*VV(x))),y)
--> 117                         for *x,y in iter(dl)])
    118     return to_np(torch.cat(preda)), to_np(torch.cat(targa))

~/Kaukasos/FADL1/fastai/model.py in <listcomp>(.0)
    114     m.eval()
    115     if hasattr(m, 'reset'): m.reset()
--> 116     preda,targa = zip(*[(get_prediction(m(*VV(x))),y)
    117                         for *x,y in iter(dl)])
    118     return to_np(torch.cat(preda)), to_np(torch.cat(targa))

~/Kaukasos/FADL1/fastai/dataset.py in __next__(self)
    218         if self.i>=len(self.dl): raise StopIteration
    219         self.i+=1
--> 220         return next(self.it)
    222     @property

~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    193         while True:
    194             assert (not self.shutdown and self.batches_outstanding > 0)
--> 195             idx, batch = self.data_queue.get()
    196             self.batches_outstanding -= 1
    197             if idx != self.rcvd_idx:

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py in get(self)
    335             res = self._reader.recv_bytes()
    336         # unserialize the data after having released the lock
--> 337         return _ForkingPickler.loads(res)
    339     def put(self, obj):

~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/multiprocessing/reductions.py in rebuild_storage_fd(cls, df, size)
     68         fd = multiprocessing.reduction.rebuild_handle(df)
     69     else:
---> 70         fd = df.detach()
     71     try:
     72         storage = storage_from_cache(cls, fd_id(fd))

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/resource_sharer.py in detach(self)
     56             '''Get the fd.  This should only be called once.'''
     57             with _resource_sharer.get_connection(self._id) as conn:
---> 58                 return reduction.recv_handle(conn)

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/reduction.py in recv_handle(conn)
    180         '''Receive a handle over a local connection.'''
    181         with socket.fromfd(conn.fileno(), socket.AF_UNIX, socket.SOCK_STREAM) as s:
--> 182             return recvfds(s, 1)[0]
    184     def DupFd(fd):

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/reduction.py in recvfds(sock, size)
    159             if len(ancdata) != 1:
    160                 raise RuntimeError('received %d items of ancdata' %
--> 161                                    len(ancdata))
    162             cmsg_level, cmsg_type, cmsg_data = ancdata[0]
    163             if (cmsg_level == socket.SOL_SOCKET and

RuntimeError: received 0 items of ancdata

In [4]:
!ulimit -n


In [2]:
# Python fix to ulimit issue above: https://github.com/fastai/fastai/issues/23#issuecomment-345091054
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
print(f'getrlimit before:{resource.getrlimit(resource.RLIMIT_NOFILE)}')
resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
print(f'getrlimit after:{resource.getrlimit(resource.RLIMIT_NOFILE)}')

getrlimit before:(1024, 1048576)
getrlimit after:(4096, 1048576)

In [3]:
# checking:
!ulimit -n


<< Reload everything and run .TTA() again >>

In [4]:
log_preds = learn.TTA(is_test=True)

ValueError                                Traceback (most recent call last)
<ipython-input-4-dbb6acf801c1> in <module>()
      1 log_preds = learn.TTA(is_test=True)
      2 # making sure I don't waste 15 minutes by losing predictions to an ssh broken pipe again
----> 3 pd.DataFrame.to_feather(log_preds, 'RNx_225_all_50_logpreds.feather')

~/miniconda3/envs/fastai/lib/python3.6/site-packages/pandas/core/frame.py in to_feather(self, fname)
   1623         """
   1624         from pandas.io.feather_format import to_feather
-> 1625         to_feather(self, fname)
   1627     def to_parquet(self, fname, engine='auto', compression='snappy',

~/miniconda3/envs/fastai/lib/python3.6/site-packages/pandas/io/feather_format.py in to_feather(df, path)
     47     path = _stringify_path(path)
     48     if not isinstance(df, DataFrame):
---> 49         raise ValueError("feather only support IO with DataFrames")
     51     feather = _try_import()

ValueError: feather only support IO with DataFrames

Saving Predictions

In [11]:
# forgot that TTA() returns 2 things
log_preds = log_preds[0]

In [13]:
df = pd.DataFrame(log_preds)

In [21]:

0 1
0 -9.827118 -0.000062
1 -7.464314 -0.000654
2 -13.206347 -0.000003
3 -13.096326 -0.000002
4 -8.735059 -0.000163

In [22]:

['cats', 'dogs']

In [27]:
df.columns = data.classes

In [30]:

0   -0.000062
1   -0.000654
2   -0.000003
3   -0.000002
4   -0.000163
5   -6.392356
6   -5.821492
7   -0.000551
8   -4.589474
9   -0.010866
Name: dogs, dtype: float32

Looks good. Need to have type(str) columns for the DataFrame otherwise Pandas won't save it as a .feather.

In [60]:
# making sure I don't waste 15 minutes by losing predictions to an ssh broken pipe again
pd.DataFrame.to_feather(df, PATH+'results/'+'RNx_225_all_50_logpreds.feather')


Submission format is id,label, with label being the predicted likelihood of being a dog. data.classes shows that 'dogs' is the 2nd category, so save the 2nd column of predictions to the submission file. Also the LogLoss eval metric judges strongly against total wrong answers (1 or 0 when the answer is NOT 1 or 0), so the predictions will be clipped to [0.05:0.95] as that gives better results.

In [32]:
test_preds = np.exp(log_preds)

In [33]:


In [34]:


In [38]:
preds = np.clip(test_preds[:,1], 0.05, 0.95)
ids   = [i[6:-4] for i in learn.data.test_dl.dataset.fnames]
submission = pd.DataFrame({'id':ids, 'label':preds})

In [46]:
SUBM = 'subm/'
submission.to_csv(PATH+SUBM+'submission_RNx_224_all_50.csv.gz', compression='gzip', index=False)

In [61]:
temp = pd.read_feather(PATH+'results/'+'RNx_225_all_50_logpreds.feather')

In [62]:


In [63]:


Good, so there's no loss of data when saving as a DataFrame.

This model got 0.08151 on the Kaggle Dogs vs Cats Redux competition. Tied for 266/1314.


fastai library updated to use loops instead of list comprehension to fix max-open file-descriptors issue -- conda-installed PyTorch uninstalled and replaced w/ source-installs.

In [3]:

In [4]:
log_preds = learn.TTA(is_test=True)[0]


In [7]:

(12500, 2)

Previous testing below:

In [4]:
log_preds = learn.TTA(is_test=True)[0]

RuntimeError                              Traceback (most recent call last)
<ipython-input-4-1548243b34e8> in <module>()
----> 1 log_preds = learn.TTA(is_test=True)[0]

~/Kaukasos/FADL1/fastai/learner.py in TTA(self, n_aug, is_test)
    165         dl1 = self.data.test_dl     if is_test else self.data.val_dl
    166         dl2 = self.data.test_aug_dl if is_test else self.data.aug_dl
--> 167         preds1,targs = predict_with_targs(self.model, dl1)
    168         preds1 = [preds1]*math.ceil(n_aug/4)
    169         preds2 = [predict_with_targs(self.model, dl2)[0] for i in tqdm(range(n_aug), leave=False)]

~/Kaukasos/FADL1/fastai/model.py in predict_with_targs(m, dl)
    115     if hasattr(m, 'reset'): m.reset()
    116     res = []
--> 117     for *x,y in iter(dl): res.append([get_prediction(m(*VV(x))),y])
    118     preda,targa = zip(*res)
    119     return to_np(torch.cat(preda)), to_np(torch.cat(targa))

~/Kaukasos/FADL1/fastai/dataset.py in __next__(self)
    217         if self.i>=len(self.dl): raise StopIteration
    218         self.i+=1
--> 219         return next(self.it)
    221     @property

~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    193         while True:
    194             assert (not self.shutdown and self.batches_outstanding > 0)
--> 195             idx, batch = self.data_queue.get()
    196             self.batches_outstanding -= 1
    197             if idx != self.rcvd_idx:

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/queues.py in get(self)
    335             res = self._reader.recv_bytes()
    336         # unserialize the data after having released the lock
--> 337         return _ForkingPickler.loads(res)
    339     def put(self, obj):

~/miniconda3/envs/fastai/lib/python3.6/site-packages/torch/multiprocessing/reductions.py in rebuild_storage_fd(cls, df, size)
     68         fd = multiprocessing.reduction.rebuild_handle(df)
     69     else:
---> 70         fd = df.detach()
     71     try:
     72         storage = storage_from_cache(cls, fd_id(fd))

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/resource_sharer.py in detach(self)
     56             '''Get the fd.  This should only be called once.'''
     57             with _resource_sharer.get_connection(self._id) as conn:
---> 58                 return reduction.recv_handle(conn)

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/reduction.py in recv_handle(conn)
    180         '''Receive a handle over a local connection.'''
    181         with socket.fromfd(conn.fileno(), socket.AF_UNIX, socket.SOCK_STREAM) as s:
--> 182             return recvfds(s, 1)[0]
    184     def DupFd(fd):

~/miniconda3/envs/fastai/lib/python3.6/multiprocessing/reduction.py in recvfds(sock, size)
    159             if len(ancdata) != 1:
    160                 raise RuntimeError('received %d items of ancdata' %
--> 161                                    len(ancdata))
    162             cmsg_level, cmsg_type, cmsg_data = ancdata[0]
    163             if (cmsg_level == socket.SOL_SOCKET and

RuntimeError: received 0 items of ancdata

In [4]:
learn = ConvLearner.pretrained(ARCH, data, ps=0.5, precompute=False)

TypeError                                 Traceback (most recent call last)
<ipython-input-4-cb8b7781773f> in <module>()
----> 1 learn = ConvLearner.pretrained(ARCH, data, ps=0.5, precompute=False)

~/Kaukasos/FADL1/fastai/conv_learner.py in pretrained(cls, f, data, ps, xtra_fc, xtra_cut, **kwargs)
     91     @classmethod
     92     def pretrained(cls, f, data, ps=None, xtra_fc=None, xtra_cut=0, **kwargs):
---> 93         models = ConvnetBuilder(f, data.c, data.is_multi, data.is_reg, ps=ps, xtra_fc=xtra_fc, xtra_cut=xtra_cut)
     94         return cls(data, models, **kwargs)

~/Kaukasos/FADL1/fastai/conv_learner.py in __init__(self, f, c, is_multi, is_reg, ps, xtra_fc, xtra_cut)
     43         if not isinstance(self.ps, list): self.ps = [self.ps]*n_fc
---> 45         fc_layers = self.get_fc_layers()
     46         self.n_fc = len(fc_layers)
     47         self.fc_model = to_gpu(nn.Sequential(*fc_layers))

~/Kaukasos/FADL1/fastai/conv_learner.py in get_fc_layers(self)
     65             res += self.create_fc_layer(ni, nf, p=self.ps[i], actn=nn.ReLU())
     66             ni=nf
---> 67         final_actn = nn.Sigmoid() if self.is_multi else nn.LogSoftmax(1)
     68         if self.is_reg: final_actn = None
     69         res += self.create_fc_layer(ni, self.c, p=self.ps[-1], actn=final_actn)

TypeError: __init__() takes 1 positional argument but 2 were given

In [ ]:

In [ ]:
log_preds = learn.TTA(is_test=True)