In [1]:
%matplotlib inline
path = "data/redux/"
# path = "data/redux/sample/"
from utils import *
In [2]:
batch_size = 8
In [3]:
# %cd data/redux
In [4]:
# %cd train
In [5]:
# %mkdir ../valid
In [6]:
# g = glob('*.jpg')
# shuf = np.random.permutation(g)
# for i in range(2000): os.rename(shuf[i], '../valid/' + shuf[i])
In [7]:
# %ls data/redux/valid | wc -l
In [8]:
# %mkdir ../sample
# %mkdir ../sample/train
# %mkdir ../sample/valid
In [9]:
# 复制 而不是 之前的移动 注意
from shutil import copyfile
In [10]:
# g = glob('*.jpg')
# shuf = np.random.permutation(g)
# for i in range(200): copyfile(shuf[i], '../sample/train/' + shuf[i])
In [11]:
# %ls sample/train | wc -l
In [12]:
# g = glob('*.jpg')
# shuf = np.random.permutation(g)
# for i in range(50): copyfile(shuf[i], '../sample/valid/' + shuf[i])
In [13]:
# %ls ../sample/valid | wc -l
In [14]:
# %mkdir cats
# %mkdir dogs
# %mkdir ../valid/cats
# %mkdir ../valid/dogs
# %mkdir ../sample/valid/cats
# %mkdir ../sample/valid/dogs
# %mkdir ../sample/train/cats
# %mkdir ../sample/train/dogs
In [15]:
# %mv cat.*.jpg cats/
# %mv dog.*.jpg dogs/
In [16]:
# %cd ../valid/
# %mv cat.*.jpg cats/
# %mv dog.*.jpg dogs/
In [17]:
# %cd ../sample/valid/
# %mv cat.*.jpg cats/
# %mv dog.*.jpg dogs/
In [18]:
# %cd ../train/
# %mv cat.*.jpg cats/
# %mv dog.*.jpg dogs/
In [19]:
from vgg16 import Vgg16
In [20]:
vgg = Vgg16()
In [21]:
batches = vgg.get_batches(path + 'train', batch_size=batch_size)
val_batches = vgg.get_batches(path + 'valid', batch_size=batch_size)
In [22]:
vgg.finetune(batches)
In [23]:
vgg.fit(batches, val_batches, nb_epoch=1)
In [25]:
# %mkdir data/redux/results
In [26]:
vgg.model.save_weights(path + 'results/ft1.h5')
# vgg.model.load_weights(path + 'results/ft1.h5')
In [27]:
vgg.fit(batches, val_batches, nb_epoch=1)
In [28]:
vgg.model.save_weights(path + 'results/ft2.h5')
In [29]:
vgg.model.optimizer.lr = .01
vgg.fit(batches, val_batches, nb_epoch=1)
In [30]:
vgg.model.save_weights(path + 'results/ft2_1.h5')
In [31]:
vgg.fit(batches, val_batches, nb_epoch=1)
In [32]:
vgg.model.save_weights(path + 'results/ft2_1.h5')
In [33]:
# %mkdir data/redux/test1/unknown
In [34]:
# %mv data/redux/test1/*.jpg data/redux/test1/unknown/
# vgg.model.summary()
In [35]:
batches, preds = vgg.test(path + 'test1', batch_size = batch_size, class_mode=None)
In [36]:
filenames = batches.filenames
filenames[:5]
Out[36]:
In [37]:
preds[:5]
Out[37]:
In [38]:
save_array(path+'results/test_preds.dat', preds)
save_array(path+'results/filenames.dat', filenames)
In [39]:
preds = load_array(path +'results/test_preds.dat')
filenames = load_array(path + 'results/filenames.dat')
In [40]:
Image.open(path + 'test1/' + filenames[0])
Out[40]:
In [41]:
isdog = np.clip(preds[:,1], 0.05, 0.95)
isdog[:5]
Out[41]:
In [42]:
ids = [int(f[8:f.find('.')]) for f in filenames]
ids[:5]
Out[42]:
In [43]:
result = np.stack([ids, isdog], axis=1)
result[:5]
Out[43]:
In [44]:
np.savetxt(path + 'sub1.csv', result, fmt='%d,%.5f', header='id,label', comments='')
In [45]:
from IPython.display import FileLink
FileLink(path + 'sub1.csv')
Out[45]:
1.一些随机的正确的标签 2.一些随机的不正确的标签 3.每个类别最正确的标签 4.每个类别最不正确的标签 5.最不确定的标签 (closest to .5)
In [ ]:
vgg.model.load_weights(path + 'results/ft1.h5')
In [ ]:
val_batches, probs = vgg.test(path + 'vaalid', batch_size = batch_size)
In [ ]:
labels = val_batches.classes
filenames = val_batches.filenames
In [ ]:
probs = probs[:, 0]
probs = np.round(1 - probs)
probs[:8]