Preparing dataset of kaggle "dogs and cats". https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition
This script is based on https://github.com/fastai/courses/blob/master/deeplearning1/nbs/dogs_cats_redux.ipynb
In [2]:
%%bash
source activate root # you need to change here to your env name
pip install kaggle-cli
In [1]:
%%bash
source activate root # you need to change here to your env name
rm -rf data
mkdir -p data
pushd data
kg download
unzip -q train.zip
unzip -q test.zip
popd
In [2]:
from glob import glob
import numpy as np
from shutil import move, copyfile
%mkdir -p data/train
%mkdir -p data/valid
%mkdir -p data/sample/train
%mkdir -p data/sample/valid
%pushd data/train
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200): copyfile(shuf[i], '../sample/train/' + shuf[i])
shuf = np.random.permutation(g)
for i in range(200): copyfile(shuf[i], '../sample/valid/' + shuf[i])
# validation files are moved
shuf = np.random.permutation(g)
for i in range(1000): move(shuf[i], '../valid/' + shuf[i])
%popd
In [3]:
%pushd data/train
% mkdir cat dog
% mv cat*.jpg cat
% mv dog*.jpg dog
%popd
%pushd data/valid
% mkdir cat dog
% mv cat*.jpg cat
% mv dog*.jpg dog
%popd
%pushd data/sample/train
% mkdir cat dog
% mv cat*.jpg cat
% mv dog*.jpg dog
%popd
%pushd data/sample/valid
% mkdir cat dog
% mv cat*.jpg cat
% mv dog*.jpg dog
%popd
%pushd data/test
% mkdir unknown
% mv *.jpg unknown
%popd
In [ ]: