In [ ]:

Person: person
Animal: bird, cat, cow, dog, horse, sheep
Vehicle: aeroplane, bicycle, boat, bus, car, motorbike, train
Indoor: bottle, chair, dining table, potted plant, sofa, tv/monitor

In [1]:
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
               'bus', 'car', 'cat', 'chair', 'cow', 'dining table',
               'dog', 'horse', 'motorbike', 'person', 'potted plant',
               'sheep', 'sofa', 'train', 'tv/monitor']

# +1するのはなぜ?
NUM_CLASSES = len(voc_classes) + 1

SSD Networkの実装


In [28]:
"""SSDのKeras実装"""
from keras.layers import Input, Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten
from keras.models import Model

def SSD300(input_shape, num_classes=21):
    """SSD300 (low resolution model)"""
    input_layer = Input(shape=input_shape)
    
    # Block1
    conv1_1 = Conv2D(64, (3, 3), name='conv1_1', padding='same', activation='relu')(input_layer)
    conv1_2 = Conv2D(64, (3, 3), name='conv1_2', padding='same', activation='relu')(conv1_1)
    pool1 = MaxPooling2D(name='pool1', pool_size=(2, 2), strides=(2, 2), padding='same')(conv1_2)
    
    # Block2
    conv2_1 = Conv2D(128, (3, 3), name='conv2_1', padding='same', activation='relu')(pool1)
    conv2_2 = Conv2D(128, (3, 3), name='conv2_2', padding='same', activation='relu')(conv2_1)
    pool2 = MaxPooling2D(name='pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(conv2_2)
    
    # Block3
    conv3_1 = Conv2D(256, (3, 3), name='conv3_1', padding='same', activation='relu')(pool2)
    conv3_2 = Conv2D(256, (3, 3), name='conv3_2', padding='same', activation='relu')(conv3_1)
    conv3_3 = Conv2D(256, (3, 3), name='conv3_3', padding='same', activation='relu')(conv3_2)
    pool3 = MaxPooling2D(name='pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(conv3_3)
    
    # Block4
    conv4_1 = Conv2D(512, (3, 3), name='conv4_1', padding='same', activation='relu')(pool3)
    conv4_2 = Conv2D(512, (3, 3), name='conv4_2', padding='same', activation='relu')(conv4_1)
    conv4_3 = Conv2D(512, (3, 3), name='conv4_3', padding='same', activation='relu')(conv4_2)
    pool4 = MaxPooling2D(name='pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(conv4_3)
    
    # Block5
    conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu')(pool4)
    conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu')(conv5_1)
    conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu')(conv5_2)
    pool5 = MaxPooling2D(name='pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(conv5_3)
    
    # FC6
    fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu')(pool5)
    
    # FC7
    fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu')(fc6)
    
    # Block6 (conv8?)
    conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7)
    conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='same', activation='relu')(conv6_1)
    
    # Block7 (conv9?)
    conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2)
    conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1)
    conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z)

    # Block8 (conv 10?)
    conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2)
    conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='same', strides=(2, 2), activation='relu')(conv8_1)

    # Last Pool (conv 11?)
    pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)
    
    # Prediction from conv4_3
    num_priors = 3
    img_size = (input_shape[1], input_shape[0])
    name = 'conv4_3_norm_mbox_conf'
    if num_classes != 21:
        name += '_{}'.format(num_classes)
    
    conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3)

    conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm)
    conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)
    
    conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), name=name, padding='same')(conv4_3_norm)
    conv4_3_norm_mbox_conf_flat = Flatten(name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)

    conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0, name='conv4_3_norm_mbox_priorbox',
                                          aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm)

    model = Model(inputs=input_layer, outputs=pool6)
    
    return model

In [29]:
input_shape=(300, 300, 3)
model = SSD300(input_shape, num_classes=NUM_CLASSES)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-ead868015d67> in <module>()
      1 input_shape=(300, 300, 3)
----> 2 model = SSD300(input_shape, num_classes=NUM_CLASSES)

<ipython-input-28-9b198ef4ec37> in SSD300(input_shape, num_classes)
     64         name += '_{}'.format(num_classes)
     65 
---> 66     conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3)
     67     conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm)
     68     conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)

NameError: name 'Normalize' is not defined

In [27]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_9 (InputLayer)         (None, 300, 300, 3)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 300, 300, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 300, 300, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 150, 150, 64)      0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 150, 150, 128)     73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 150, 150, 128)     147584    
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 75, 75, 128)       0         
_________________________________________________________________
conv3_1 (Conv2D)             (None, 75, 75, 256)       295168    
_________________________________________________________________
conv3_2 (Conv2D)             (None, 75, 75, 256)       590080    
_________________________________________________________________
conv3_3 (Conv2D)             (None, 75, 75, 256)       590080    
_________________________________________________________________
pool3 (MaxPooling2D)         (None, 38, 38, 256)       0         
_________________________________________________________________
conv4_1 (Conv2D)             (None, 38, 38, 512)       1180160   
_________________________________________________________________
conv4_2 (Conv2D)             (None, 38, 38, 512)       2359808   
_________________________________________________________________
conv4_3 (Conv2D)             (None, 38, 38, 512)       2359808   
_________________________________________________________________
pool4 (MaxPooling2D)         (None, 19, 19, 512)       0         
_________________________________________________________________
conv5_1 (Conv2D)             (None, 19, 19, 512)       2359808   
_________________________________________________________________
conv5_2 (Conv2D)             (None, 19, 19, 512)       2359808   
_________________________________________________________________
conv5_3 (Conv2D)             (None, 19, 19, 512)       2359808   
_________________________________________________________________
pool5 (MaxPooling2D)         (None, 19, 19, 512)       0         
_________________________________________________________________
fc6 (Conv2D)                 (None, 19, 19, 1024)      4719616   
_________________________________________________________________
fc7 (Conv2D)                 (None, 19, 19, 1024)      1049600   
_________________________________________________________________
conv6_1 (Conv2D)             (None, 19, 19, 256)       262400    
_________________________________________________________________
conv6_2 (Conv2D)             (None, 10, 10, 512)       1180160   
_________________________________________________________________
conv7_1 (Conv2D)             (None, 10, 10, 128)       65664     
_________________________________________________________________
conv7_1z (ZeroPadding2D)     (None, 12, 12, 128)       0         
_________________________________________________________________
conv7_2 (Conv2D)             (None, 5, 5, 256)         295168    
_________________________________________________________________
conv8_1 (Conv2D)             (None, 5, 5, 128)         32896     
_________________________________________________________________
conv8_2 (Conv2D)             (None, 3, 3, 256)         295168    
_________________________________________________________________
pool6 (GlobalAveragePooling2 (None, 256)               0         
=================================================================
Total params: 22,615,360
Trainable params: 22,615,360
Non-trainable params: 0
_________________________________________________________________

In [ ]: