If the neural network has a fixed size as input images we'll have to deal with it.
So the possible solutions will be:
Spatial Pyramid Pooling tries to resolve it. Indeed it should improve the efficiency.
In [27]:
#Import libraries
import os
import glob
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.misc import imread, imresize
from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
In [2]:
#Initial data inside simpsons.tar.gz
info = pd.read_csv('./the-simpsons-characters-dataset-MEDIUM/number_pic_char.csv')
print(info)
In [3]:
def get_character_directories(path):
character_directories = os.listdir(path)
if '.DS_Store' in character_directories: character_directories.remove('.DS_Store')
return character_directories
def get_images_from_directory(path):
image_files = os.listdir(path)
if '.DS_Store' in image_files: image_files.remove('.DS_Store')
return image_files
In [4]:
#root pats
train_root_path = "./the-simpsons-characters-dataset-MEDIUM/simpsons_dataset"
test_root_path = "./the-simpsons-characters-dataset-MEDIUM/kaggle_simpson_testset"
#train directories
character_directories = get_character_directories(train_root_path)
In [5]:
def test_paths():
#configure train dataset
print("Train: {}".format(character_directories[:5]))
#images from one train directory
character_path = train_root_path + "/" + character_directories[1]
train_image_names = get_images_from_directory(character_path)
print("Images of {}: {}".format(character_directories[1], train_image_names[:5]))
#configure test dataset
test_image_names = get_images_from_directory(test_root_path)
print("\nTest: {}".format(test_image_names[:5]))
test_paths()
In [6]:
def plot_image(image):
plt.figure(figsize=(3, 3))
plt.imshow(image)
plt.axis('off')
plt.show()
In [7]:
def show_train_image(character_directory, idx):
directory = os.path.join(train_root_path, character_directory)
sample_file = get_images_from_directory(directory)[idx]
path_file = os.path.join(directory, sample_file)
sample_image = imread(path_file)
print("Label:{}, Image:{}, Shape:{}".format(character_directory, idx, sample_image.shape))
plot_image(sample_image)
idx = random.randint(0, 10)
show_train_image(character_directories[idx], idx)
In [8]:
def show_test_image(idx):
test_image_names = get_images_from_directory(test_root_path)
sample_file, sample_name = test_image_names[idx], test_image_names[idx].split('_')[:-1]
path_file = os.path.join(test_root_path, sample_file)
sample_image = imread(path_file)
print("Label:{}, Image:{}, Shape:{}".format('_'.join(sample_name), idx, sample_image.shape))
plot_image(sample_image)
idx = random.randint(0, 10)
show_test_image(idx)
In [9]:
'''
Input 'different_characters' will define the number of different Simpson
characters the generator is going to return
@Return: tuple (images list, names list)
'''
#Create the batch generator
def batch_generator_training_images(batch_size, different_characters, verbose=False):
#Obtain the characters Serie with the most number of images
characters = info.sort_values(by='total', ascending=False).name
#transform to list with the following format: Homer Simpson -> homer_simpson
characters = characters.map(lambda x: x.replace(" ", "_").lower()).tolist()
path_name_list = list()
for i in range(0, different_characters):
directory = os.path.join(train_root_path, characters[i])
path_name_list += list(map( lambda x: (os.path.join(directory, x), characters[i]),
get_images_from_directory(directory)))
#shuffle all the images
shuffle(path_name_list)
#getting a number of complete chunks according to the batch size
number_complete_chunks = len(path_name_list)//batch_size
num_images_in_all_chunks = number_complete_chunks * batch_size
for b in range(0, num_images_in_all_chunks, batch_size):
if verbose:
print("init: {}, end:{}".format(b, b+batch_size))
yield (list(map( lambda x: imread(x[0]), path_name_list[b:b + batch_size])), #x == path
list(map( lambda x: x[1], path_name_list[b:b + batch_size])) ) #x == name
In [10]:
#testing how to show an image from generator
def testing_generator_1():
for batch in batch_generator_training_images(32, 10, True):
for img, name in zip(*batch):
print(name)
plot_image(img)
break
pass
break
testing_generator_1()
In [11]:
#testing the generator
def testing_generator_2():
group = {}
for batch in batch_generator_training_images(8, 10, True):
for img, name in zip(*batch):
if not img.shape in group:
group[img.shape] = []
group[img.shape].append(img)
pass
break
for key, lists in group.items():
print("shape{} count:{}".format(key, len(lists)))
testing_generator_2()
In [12]:
#global variable
num_classes = 10 #different number of Simpons characters
In [13]:
#one hot encoder
lb = preprocessing.LabelBinarizer()
def customize_onehot_encoder(num_classes):
#Obtain the characters Serie with the most number of images
characters = info.sort_values(by='total', ascending=False).name
#transform to list with the following format: Homer Simpson -> homer_simpson
names = characters[:num_classes].map(lambda x: x.replace(" ", "_").lower()).tolist()
print("Character classes: {}\n".format(names))
lb.fit(names)
In [14]:
#global variable
file_prefix = "spp_simpson_train_"
batch_size = 1024
In [20]:
import pickle
def create_processed_files(batch_size=64):
customize_onehot_encoder(num_classes)
# Format of data => {shape: {'images':[], 'labels':[]} }
data = {}
'''
The batch generator returns a tuple of two lists!!! (list_images, list_names)
'''
for cnt, batch in enumerate(batch_generator_training_images(batch_size, num_classes)):
#group by shape
grouped = {}
for image, name in zip(*batch):
if not image.shape in grouped:
grouped[image.shape] = []
grouped[image.shape].append((image, name))
#training images which are grouped by shape at the same time
for shape, tuple_imagesList_namesList in grouped.items():
imagesList, namesList = zip(*tuple_imagesList_namesList)
#casting
images = np.array(imagesList)
#onehot encoding names
names = np.array(lb.transform(namesList))
if len(shape) == 3: #just in case dimension are less than 3
#print("Storing shape:{} with {}".format(shape, len(images)))
data[shape] = {'images': images, 'labels': names}
else:
print("W: Dim image < 3")
#save into disk
with open("{}{}.pkl".format(file_prefix, cnt), 'wb') as file:
pickle.dump(data, file, pickle.HIGHEST_PROTOCOL)
print("{} saved".format("{}{}.pkl".format(file_prefix, cnt)))
create_processed_files(batch_size)
Since here, the images are processed and stored in different pickle files.
The format of the data is a dictionary which follows the format:
{shape: {'images':[], 'labels':[]} }
Keras doesn't support currently the SSP layer but thanks to yhenon there's a module which is integrated into keras. Code is here
In [21]:
from keras.models import Sequential
from keras.layers import Conv2D, Convolution2D, Activation, MaxPooling2D, Dense, Dropout
from spp.SpatialPyramidPooling import SpatialPyramidPooling
In [22]:
'''
Build a model with a typical network architecture
'''
def build_model(num_channels = 3, num_classes = 10, filters = 32):
model = Sequential()
#Note: leave the image size as None to allow multiple image sizes
model.add(Conv2D(filters, (3, 3), padding='same', input_shape=(None, None, num_channels)))
model.add(Activation('relu'))
model.add(Conv2D(filters, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters*2, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(filters*2, (3, 3)))
model.add(Activation('relu'))
model.add(SpatialPyramidPooling([1, 2, 4]))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
build_model().summary()
In [23]:
#TESTING
# train on images with different sizes
def model_tried_out(num_channels, num_classes, batch_size):
model = build_model(num_channels, num_classes)
#size 1
model.fit(np.random.rand(batch_size, 8, 9, num_channels),
np.random.rand(batch_size, num_classes),
epochs=1)
#size 2
model.fit(np.random.rand(batch_size, 20, 13, num_channels),
np.random.rand(batch_size, num_classes),
epochs=1)
model_tried_out(2, 2, 16)
In [24]:
#global variables
num_channels = 3
In [32]:
def train_model():
customize_onehot_encoder(num_classes)
model = build_model(num_channels, num_classes)
num_files = len(glob.glob("{}*".format(file_prefix)))
for i in range(num_files):
fname = "{}{}.pkl".format(file_prefix, i)
if os.path.exists(fname):
with open(fname, 'rb') as file:
data = pickle.load(file)
print("\nProcessing file: {}".format(fname))
for shape, data in data.items():
print("Training shape: {}".format(shape))
val_size = 0.3 if len(data['images']) > 3 else 0
#X_train, X_vale, y_train, y_vale = train_test_split(data['images'], data['labels'],
# test_size=test_size)
model.fit(data['images'], data['labels'], validation_split=val_size, epochs=1, verbose=1)
return model
model1 = train_model()
In [26]:
def eyeball_test_model(model):
idx = random.randint(0, 100)
test_image_names = get_images_from_directory(test_root_path)
sample_file, sample_name = test_image_names[idx], test_image_names[idx].split('_')[:-1]
path_file = os.path.join(test_root_path, sample_file)
sample_image = imread(path_file)
print("Label: {}".format('_'.join(sample_name)))
#prediction
pred = model.predict(sample_image[np.newaxis, :])
print("PREDICTION: {}".format(lb.inverse_transform(pred)[0]))
#showing the image
plot_image(sample_image)
eyeball_test_model(model1)
In [ ]: