Simpsons Classifier

Images with different sizes: understanding Spatial Pyramid Pooling

If the neural network has a fixed size as input images we'll have to deal with it.

So the possible solutions will be:

  • Crop: problem, there is information that gets lost.
  • Warp/Resize: problem, the image patterns change and we lose accuracy.
  • Padding: problem, the model might be biased to images that contain such a padded border.
  • Combination of previous.

Spatial Pyramid Pooling tries to resolve it. Indeed it should improve the efficiency.

Paper: https://arxiv.org/abs/1406.4729

Defining Datasets


In [27]:
#Import libraries
import os
import glob
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.misc import imread, imresize
from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [2]:
#Initial data inside simpsons.tar.gz
info = pd.read_csv('./the-simpsons-characters-dataset-MEDIUM/number_pic_char.csv')
print(info)


    Unnamed: 0                      name  total  train  test  bounding_box
0            0             Homer Simpson   2246   1909   337           612
1            1              Ned Flanders   1454   1236   218           595
2            2               Moe Szyslak   1452   1234   218           215
3            3              Lisa Simpson   1354   1151   203           562
4            4              Bart Simpson   1342   1141   201           554
5            5             Marge Simpson   1291   1097   194           557
6            6          Krusty The Clown   1206   1025   181           226
7            7         Principal Skinner   1194   1015   179           506
8            8  Charles Montgomery Burns   1193   1014   179           650
9            9       Milhouse Van Houten   1079    917   162           210
10          10              Chief Wiggum    986    838   148           209
11          11    Abraham Grampa Simpson    913    776   137           595
12          12              Sideshow Bob    877    745   132           203
13          13    Apu Nahasapeemapetilon    623    530    93           206
14          14             Kent Brockman    498    423    75           213
15          15            Comic Book Guy    469    399    70           208
16          16            Edna Krabappel    457    388    69           212
17          17              Nelson Muntz    358    304    54           219
18          18             Lenny Leonard    310    264    46             0
19          19              Mayor Quimby    246    209    37             0
20          20           Waylon Smithers    181    154    27             0
21          21            Maggie Simpson    128    109    19             0
22          22      Groundskeeper Willie    121    103    18             0
23          23             Barney Gumble    106     90    16             0
24          24             Selma Bouvier    103     88    15             0
25          25              Carl Carlson     98     83    15             0
26          26              Ralph Wiggum     89     76    13             0
27          27             Patty Bouvier     72     61    11             0
28          28             Martin Prince     71     60    11             0
29          29      Professor John Frink     65     55    10             0
30          30            Snake Jailbird     55     47     8             0
31          31           Cletus Spuckler     47     40     7             0
32          32        Rainier Wolfcastle     45     38     7             0
33          33             Agnes Skinner     42     36     6             0
34          34              Sideshow Mel     40     34     6             0
35          35                 Otto Mann     32     27     5             0
36          36                  Fat Tony     27     23     4             0
37          37                       Gil     27     23     4             0
38          38               Miss Hoover     17     14     3             0
39          39                 Disco Stu      8      7     1             0
40          40              Troy Mcclure      8      7     1             0
41          41               Lionel Hutz      3      3     0             0
42          42               Jimbo Jones      0      0     0             0
43          43             Bumblebee Man      0      0     0             0
44          44              Hans Moleman      0      0     0             0
45          45             Helen Lovejoy      0      0     0             0
46          46            Jasper Beardly      0      0     0             0

In [3]:
def get_character_directories(path):
    character_directories = os.listdir(path)
    if '.DS_Store' in character_directories: character_directories.remove('.DS_Store')
    return character_directories

def get_images_from_directory(path):
    image_files = os.listdir(path)
    if '.DS_Store' in image_files: image_files.remove('.DS_Store')
    return image_files

In [4]:
#root pats
train_root_path = "./the-simpsons-characters-dataset-MEDIUM/simpsons_dataset"
test_root_path = "./the-simpsons-characters-dataset-MEDIUM/kaggle_simpson_testset"

#train directories
character_directories = get_character_directories(train_root_path)

In [5]:
def test_paths():
    #configure train dataset
    print("Train: {}".format(character_directories[:5]))

    #images from one train directory
    character_path = train_root_path + "/" + character_directories[1]
    train_image_names = get_images_from_directory(character_path)
    print("Images of {}: {}".format(character_directories[1], train_image_names[:5]))


    #configure test dataset
    test_image_names = get_images_from_directory(test_root_path)
    print("\nTest: {}".format(test_image_names[:5]))

test_paths()


Train: ['maggie_simpson', 'charles_montgomery_burns', 'patty_bouvier', 'ralph_wiggum', 'chief_wiggum']
Images of charles_montgomery_burns: ['pic_0982.jpg', 'pic_0028.jpg', 'pic_0996.jpg', 'pic_0766.jpg', 'pic_0000.jpg']

Test: ['principal_skinner_34.jpg', 'apu_nahasapeemapetilon_28.jpg', 'abraham_grampa_simpson_41.jpg', 'principal_skinner_20.jpg', 'apu_nahasapeemapetilon_14.jpg']

Showing Training and Test Samples


In [6]:
def plot_image(image):
    plt.figure(figsize=(3, 3))
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [7]:
def show_train_image(character_directory, idx):
    directory = os.path.join(train_root_path, character_directory)
    sample_file = get_images_from_directory(directory)[idx]
    path_file = os.path.join(directory, sample_file)
    sample_image = imread(path_file)

    print("Label:{}, Image:{}, Shape:{}".format(character_directory, idx, sample_image.shape))
    plot_image(sample_image)
    
idx = random.randint(0, 10)
show_train_image(character_directories[idx], idx)


Label:milhouse_van_houten, Image:6, Shape:(108, 72, 3)

In [8]:
def show_test_image(idx):
    test_image_names = get_images_from_directory(test_root_path)
    sample_file, sample_name = test_image_names[idx], test_image_names[idx].split('_')[:-1]
    path_file = os.path.join(test_root_path, sample_file)
    sample_image = imread(path_file)

    print("Label:{}, Image:{}, Shape:{}".format('_'.join(sample_name), idx, sample_image.shape))
    plot_image(sample_image)

idx = random.randint(0, 10)
show_test_image(idx)


Label:apu_nahasapeemapetilon, Image:1, Shape:(86, 57, 3)

Training Images


In [9]:
'''
Input 'different_characters' will define the number of different Simpson
characters the generator is going to return

@Return: tuple (images list, names list)
'''
#Create the batch generator
def batch_generator_training_images(batch_size, different_characters, verbose=False):
    #Obtain the characters Serie with the most number of images
    characters = info.sort_values(by='total', ascending=False).name

    #transform to list with the following format:  Homer Simpson -> homer_simpson
    characters = characters.map(lambda x: x.replace(" ", "_").lower()).tolist()

    path_name_list = list()
    for i in range(0, different_characters):
        directory = os.path.join(train_root_path, characters[i])
        path_name_list += list(map( lambda x: (os.path.join(directory, x), characters[i]), 
                                    get_images_from_directory(directory)))

    #shuffle all the images
    shuffle(path_name_list)
    
    #getting a number of complete chunks according to the batch size
    number_complete_chunks = len(path_name_list)//batch_size
    num_images_in_all_chunks = number_complete_chunks * batch_size
        
    for b in range(0, num_images_in_all_chunks, batch_size):
        if verbose:
            print("init: {}, end:{}".format(b, b+batch_size))
        
        yield (list(map( lambda x: imread(x[0]), path_name_list[b:b + batch_size])), #x == path
               list(map( lambda x: x[1], path_name_list[b:b + batch_size])) ) #x == name

In [10]:
#testing how to show an image from generator
def testing_generator_1():
    for batch in batch_generator_training_images(32, 10, True):
        for img, name in zip(*batch):
            print(name)
            plot_image(img)
            break
        pass
        break
        
testing_generator_1()


init: 0, end:32
marge_simpson

In [11]:
#testing the generator
def testing_generator_2():
    group = {}

    for batch in batch_generator_training_images(8, 10, True):
        for img, name in zip(*batch):
            if not img.shape in group:
                group[img.shape] = []
            group[img.shape].append(img)
        pass
        break

    for key, lists in group.items():
        print("shape{} count:{}".format(key, len(lists)))


testing_generator_2()


init: 0, end:8
shape(108, 72, 3) count:3
shape(88, 78, 3) count:1
shape(104, 144, 3) count:1
shape(120, 160, 3) count:1
shape(100, 66, 3) count:1
shape(104, 72, 3) count:1

In [12]:
#global variable
num_classes = 10 #different number of Simpons characters

In [13]:
#one hot encoder
lb = preprocessing.LabelBinarizer()

def customize_onehot_encoder(num_classes):
    #Obtain the characters Serie with the most number of images
    characters = info.sort_values(by='total', ascending=False).name

    #transform to list with the following format:  Homer Simpson -> homer_simpson
    names = characters[:num_classes].map(lambda x: x.replace(" ", "_").lower()).tolist()

    print("Character classes: {}\n".format(names))
    lb.fit(names)

Storing Processed Files On Disk


In [14]:
#global variable
file_prefix = "spp_simpson_train_"
batch_size = 1024

In [20]:
import pickle

def create_processed_files(batch_size=64):
    customize_onehot_encoder(num_classes)
    # Format of data => {shape: {'images':[], 'labels':[]} }
    data = {}
    
    '''
    The batch generator returns a tuple of two lists!!! (list_images, list_names)
    '''
    for cnt, batch in enumerate(batch_generator_training_images(batch_size, num_classes)):
                
        #group by shape
        grouped = {}
        for image, name in zip(*batch):
            if not image.shape in grouped:
                grouped[image.shape] = []
            grouped[image.shape].append((image, name))        
        
        #training images which are grouped by shape at the same time
        for shape, tuple_imagesList_namesList in grouped.items():
            imagesList, namesList = zip(*tuple_imagesList_namesList)
            
            #casting
            images = np.array(imagesList)
            #onehot encoding names
            names = np.array(lb.transform(namesList))
            
            if len(shape) == 3: #just in case dimension are less than 3
                #print("Storing shape:{} with {}".format(shape, len(images)))
                data[shape] = {'images': images, 'labels': names}
            else:
                print("W: Dim image < 3")
        
        #save into disk
        with open("{}{}.pkl".format(file_prefix, cnt), 'wb') as file:
            pickle.dump(data, file, pickle.HIGHEST_PROTOCOL)
            print("{} saved".format("{}{}.pkl".format(file_prefix, cnt)))
        

create_processed_files(batch_size)


Character classes: ['homer_simpson', 'ned_flanders', 'moe_szyslak', 'lisa_simpson', 'bart_simpson', 'marge_simpson', 'krusty_the_clown', 'principal_skinner', 'charles_montgomery_burns', 'milhouse_van_houten']

spp_simpson_train_0.pkl saved
spp_simpson_train_1.pkl saved
spp_simpson_train_2.pkl saved
spp_simpson_train_3.pkl saved
spp_simpson_train_4.pkl saved
spp_simpson_train_5.pkl saved
W: Dim image < 3
spp_simpson_train_6.pkl saved
spp_simpson_train_7.pkl saved
spp_simpson_train_8.pkl saved
spp_simpson_train_9.pkl saved
spp_simpson_train_10.pkl saved
spp_simpson_train_11.pkl saved
spp_simpson_train_12.pkl saved

Since here, the images are processed and stored in different pickle files.

The format of the data is a dictionary which follows the format:

{shape: {'images':[], 'labels':[]} }

Keras + SPP Module

Keras doesn't support currently the SSP layer but thanks to yhenon there's a module which is integrated into keras. Code is here


In [21]:
from keras.models import Sequential
from keras.layers import Conv2D, Convolution2D, Activation, MaxPooling2D, Dense, Dropout
from spp.SpatialPyramidPooling import SpatialPyramidPooling


Using TensorFlow backend.

In [22]:
'''
Build a model with a typical network architecture
'''
def build_model(num_channels = 3, num_classes = 10, filters = 32):
    model = Sequential()

    #Note: leave the image size as None to allow multiple image sizes
    model.add(Conv2D(filters, (3, 3), padding='same', input_shape=(None, None, num_channels)))
    model.add(Activation('relu'))

    model.add(Conv2D(filters, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(filters*2, (3, 3), padding='same'))
    model.add(Activation('relu'))

    model.add(Conv2D(filters*2, (3, 3)))
    model.add(Activation('relu'))
    model.add(SpatialPyramidPooling([1, 2, 4]))

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

build_model().summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_1 (Conv2D)            (None, None, None, 32)    896       
_________________________________________________________________
activation_1 (Activation)    (None, None, None, 32)    0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, None, None, 32)    9248      
_________________________________________________________________
activation_2 (Activation)    (None, None, None, 32)    0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, None, None, 32)    0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, None, None, 64)    18496     
_________________________________________________________________
activation_3 (Activation)    (None, None, None, 64)    0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, None, None, 64)    36928     
_________________________________________________________________
activation_4 (Activation)    (None, None, None, 64)    0         
_________________________________________________________________
spatial_pyramid_pooling_1 (S (None, 1344)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                13450     
_________________________________________________________________
activation_5 (Activation)    (None, 10)                0         
=================================================================
Total params: 79,018
Trainable params: 79,018
Non-trainable params: 0
_________________________________________________________________

In [23]:
#TESTING
# train on images with different sizes
def model_tried_out(num_channels, num_classes, batch_size):
    model = build_model(num_channels, num_classes)
    #size 1
    model.fit(np.random.rand(batch_size, 8, 9, num_channels), 
              np.random.rand(batch_size, num_classes),
              epochs=1)
    #size 2
    model.fit(np.random.rand(batch_size, 20, 13, num_channels), 
              np.random.rand(batch_size, num_classes),
              epochs=1)
    
model_tried_out(2, 2, 16)


Epoch 1/1
16/16 [==============================] - 1s 36ms/step - loss: 1.1133e-07 - acc: 0.4375
Epoch 1/1
16/16 [==============================] - 0s 3ms/step - loss: 1.1852e-07 - acc: 0.4375

Training The Model Using The Processed Files


In [24]:
#global variables
num_channels = 3

In [32]:
def train_model():
    customize_onehot_encoder(num_classes)
    
    model = build_model(num_channels, num_classes)

    num_files = len(glob.glob("{}*".format(file_prefix)))
    
    for i in range(num_files):
        fname = "{}{}.pkl".format(file_prefix, i)
        if os.path.exists(fname):   
            with open(fname, 'rb') as file:
                data = pickle.load(file)
                print("\nProcessing file: {}".format(fname))
                
                for shape, data in data.items():
                    print("Training shape: {}".format(shape))
                    
                    val_size = 0.3 if len(data['images']) > 3 else 0
                        
                    #X_train, X_vale, y_train, y_vale = train_test_split(data['images'], data['labels'],
                    #                                                    test_size=test_size)
                    
                    model.fit(data['images'], data['labels'], validation_split=val_size, epochs=1, verbose=1)
            
    return model

model1 = train_model()


Character classes: ['homer_simpson', 'ned_flanders', 'moe_szyslak', 'lisa_simpson', 'bart_simpson', 'marge_simpson', 'krusty_the_clown', 'principal_skinner', 'charles_montgomery_burns', 'milhouse_van_houten']


Processing file: spp_simpson_train_0.pkl
Training shape: (116, 160, 3)
Train on 16 samples, validate on 7 samples
Epoch 1/1
16/16 [==============================] - 5s 299ms/step - loss: 10.4038 - acc: 0.0625 - val_loss: 11.5129 - val_acc: 0.2857
Training shape: (88, 78, 3)
Train on 50 samples, validate on 22 samples
Epoch 1/1
50/50 [==============================] - 4s 81ms/step - loss: 13.5392 - acc: 0.1600 - val_loss: 14.6528 - val_acc: 0.0909
Training shape: (100, 132, 3)
Epoch 1/1
3/3 [==============================] - 0s 161ms/step - loss: 16.1181 - acc: 0.0000e+00
Training shape: (107, 72, 3)
Train on 2 samples, validate on 2 samples
Epoch 1/1
2/2 [==============================] - 0s 106ms/step - loss: 16.1181 - acc: 0.0000e+00 - val_loss: 16.1181 - val_acc: 0.0000e+00
Training shape: (107, 144, 3)
Epoch 1/1
1/1 [==============================] - 0s 165ms/step - loss: 16.1181 - acc: 0.0000e+00
Training shape: (64, 106, 3)
Epoch 1/1
1/1 [==============================] - 0s 89ms/step - loss: 1.1921e-07 - acc: 1.0000
Training shape: (70, 64, 3)
Epoch 1/1
1/1 [==============================] - 0s 56ms/step - loss: 16.1181 - acc: 0.0000e+00
Training shape: (64, 92, 3)
Epoch 1/1
1/1 [==============================] - 0s 79ms/step - loss: 16.1181 - acc: 0.0000e+00
Training shape: (64, 95, 3)
Epoch 1/1
1/1 [==============================] - 0s 81ms/step - loss: 1.1921e-07 - acc: 1.0000
Training shape: (120, 80, 3)
Train on 101 samples, validate on 44 samples
Epoch 1/1
101/101 [==============================] - 12s 119ms/step - loss: 14.0435 - acc: 0.1287 - val_loss: 14.6528 - val_acc: 0.0909
Training shape: (64, 82, 3)
Epoch 1/1
2/2 [==============================] - 0s 60ms/step - loss: 16.1181 - acc: 0.0000e+00
Training shape: (132, 90, 3)
Epoch 1/1
1/1 [==============================] - 0s 141ms/step - loss: 16.1181 - acc: 0.0000e+00
Training shape: (108, 144, 3)
Train on 60 samples, validate on 27 samples
Epoch 1/1
32/60 [===============>..............] - ETA: 5s - loss: 13.0960 - acc: 0.1875
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-32-b8788b47cc77> in <module>()
     25     return model
     26 
---> 27 model1 = train_model()

<ipython-input-32-b8788b47cc77> in train_model()
     21                     #                                                    test_size=test_size)
     22 
---> 23                     model.fit(data['images'], data['labels'], validation_split=val_size, epochs=1, verbose=1)
     24 
     25     return model

~/anaconda/envs/py3/lib/python3.5/site-packages/keras/models.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
    891                               class_weight=class_weight,
    892                               sample_weight=sample_weight,
--> 893                               initial_epoch=initial_epoch)
    894 
    895     def evaluate(self, x, y, batch_size=32, verbose=1,

~/anaconda/envs/py3/lib/python3.5/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1629                               initial_epoch=initial_epoch,
   1630                               steps_per_epoch=steps_per_epoch,
-> 1631                               validation_steps=validation_steps)
   1632 
   1633     def evaluate(self, x=None, y=None,

~/anaconda/envs/py3/lib/python3.5/site-packages/keras/engine/training.py in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
   1225                             val_outs = self._test_loop(val_f, val_ins,
   1226                                                        batch_size=batch_size,
-> 1227                                                        verbose=0)
   1228                             if not isinstance(val_outs, list):
   1229                                 val_outs = [val_outs]

~/anaconda/envs/py3/lib/python3.5/site-packages/keras/engine/training.py in _test_loop(self, f, ins, batch_size, verbose, steps)
   1368                     ins_batch = _slice_arrays(ins, batch_ids)
   1369 
-> 1370                 batch_outs = f(ins_batch)
   1371                 if isinstance(batch_outs, list):
   1372                     if batch_index == 0:

~/anaconda/envs/py3/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py in __call__(self, inputs)
   2330         updated = session.run(self.outputs + [self.updates_op],
   2331                               feed_dict=feed_dict,
-> 2332                               **self.session_kwargs)
   2333         return updated[:len(self.outputs)]
   2334 

~/anaconda/envs/py3/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    765     try:
    766       result = self._run(None, fetches, feed_dict, options_ptr,
--> 767                          run_metadata_ptr)
    768       if run_metadata:
    769         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

~/anaconda/envs/py3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    963     if final_fetches or final_targets:
    964       results = self._do_run(handle, final_targets, final_fetches,
--> 965                              feed_dict_string, options, run_metadata)
    966     else:
    967       results = []

~/anaconda/envs/py3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1013     if handle is None:
   1014       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1015                            target_list, options, run_metadata)
   1016     else:
   1017       return self._do_call(_prun_fn, self._session, handle, feed_dict,

~/anaconda/envs/py3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1020   def _do_call(self, fn, *args):
   1021     try:
-> 1022       return fn(*args)
   1023     except errors.OpError as e:
   1024       message = compat.as_text(e.message)

~/anaconda/envs/py3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002         return tf_session.TF_Run(session, options,
   1003                                  feed_dict, fetch_list, target_list,
-> 1004                                  status, run_metadata)
   1005 
   1006     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: 

Testing The Model


In [26]:
def eyeball_test_model(model):
    idx = random.randint(0, 100)
    test_image_names = get_images_from_directory(test_root_path)
    sample_file, sample_name = test_image_names[idx], test_image_names[idx].split('_')[:-1]
    path_file = os.path.join(test_root_path, sample_file)
    sample_image = imread(path_file)

    print("Label: {}".format('_'.join(sample_name)))
    
    #prediction
    pred = model.predict(sample_image[np.newaxis, :])
    print("PREDICTION: {}".format(lb.inverse_transform(pred)[0]))
 
    #showing the image
    plot_image(sample_image)

eyeball_test_model(model1)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-62b8e8b64089> in <module>()
     15     plot_image(sample_image)
     16 
---> 17 eyeball_test_model(model1)

NameError: name 'model1' is not defined

In [ ]: