In [1]:
import theano
In [2]:
import os, sys
sys.path.insert(1, os.path.join('utils'))
In [3]:
%matplotlib inline
notebook_homedir = os.getcwd()
path = 'data/catsdogs_rdx'
import utils; reload(utils)
from utils import *
In [19]:
# %cd $notebook_homedir/$path/train
# g = glob('*.jpg')
# shuf = np.random.permutation(g)
# for i in range(len(g)/10):
# os.rename(shuf[i], notebook_homedir + path + '/valid/' + shuf[i])
In [22]:
# # %pwd
# # %cd ../../..
# %cd $notebook_homedir
In [13]:
# %ls $notebook_homedir/$path
---------------------------------- Now the actual work part ----------------------------------
In [5]:
batch_size=16
vgg = Vgg16()
model = vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx + 1]
conv_model = Sequential(conv_layers)
In [6]:
test_batches = get_batches(path + '/test', batch_size=batch_size, shuffle=False)
conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
In [8]:
save_array(path + '/dgrdx_conv_test_feat.h5', conv_test_feat)
I think I found my issue in the conv_test_Asus.ipynb JNB. The above is successful, but leaves me at 7.5/16 Gb RAM usage. If that's for only 12.5k images, then I am definitely running out of memory trying to generate features on StateFarm's +79k image test-set.
So, time to find out how to save convolutional features directly to disk as they are created, in batches, instead of storing the entire giant Numpy array in memory at once.
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: