First thing we need to do is rearrange the "raw" dataset by saving all the images belonging to each class in their own subfolder.
This rearrangement is needed in order to allow us make use of the amazing Kera's ImageDataGenerator
In [1]:
import os
In [2]:
%mkdir "train/dog"
%mkdir "train/cat"
In [3]:
for dir, subdir, files in os.walk("train"):
if len(subdir) == 0:
continue
for file in files:
category = file.split(".")[0]
os.rename("{}/{}".format(dir,file), "{}/{}/{}".format(dir, category, file))
In [17]:
%mkdir "valid/dog"
%mkdir "valid/cat"
In [8]:
dogs = [x for x in os.listdir("train/dog")]
In [9]:
cats = [x for x in os.listdir("train/cat")]
In [14]:
print len(cats)
len(dogs)
Out[14]:
In [15]:
import random
In [18]:
if len(os.listdir("valid/dog")) < 1:
for n in random.sample(range(len(dogs)), 1000):
os.rename("train/dog/{}".format(dogs[n]), "valid/dog/{}".format(dogs[n]))
In [19]:
if len(os.listdir("valid/cat")) < 1:
for n in random.sample(range(len(cats)), 1000):
os.rename("train/cat/{}".format(cats[n]), "valid/cat/{}".format(cats[n]))
In [ ]: