In [ ]:
Person: person
Animal: bird, cat, cow, dog, horse, sheep
Vehicle: aeroplane, bicycle, boat, bus, car, motorbike, train
Indoor: bottle, chair, dining table, potted plant, sofa, tv/monitor
In [1]:
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
'bus', 'car', 'cat', 'chair', 'cow', 'dining table',
'dog', 'horse', 'motorbike', 'person', 'potted plant',
'sheep', 'sofa', 'train', 'tv/monitor']
# +1するのはなぜ?
NUM_CLASSES = len(voc_classes) + 1
In [28]:
"""SSDのKeras実装"""
from keras.layers import Input, Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten
from keras.models import Model
def SSD300(input_shape, num_classes=21):
"""SSD300 (low resolution model)"""
input_layer = Input(shape=input_shape)
# Block1
conv1_1 = Conv2D(64, (3, 3), name='conv1_1', padding='same', activation='relu')(input_layer)
conv1_2 = Conv2D(64, (3, 3), name='conv1_2', padding='same', activation='relu')(conv1_1)
pool1 = MaxPooling2D(name='pool1', pool_size=(2, 2), strides=(2, 2), padding='same')(conv1_2)
# Block2
conv2_1 = Conv2D(128, (3, 3), name='conv2_1', padding='same', activation='relu')(pool1)
conv2_2 = Conv2D(128, (3, 3), name='conv2_2', padding='same', activation='relu')(conv2_1)
pool2 = MaxPooling2D(name='pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(conv2_2)
# Block3
conv3_1 = Conv2D(256, (3, 3), name='conv3_1', padding='same', activation='relu')(pool2)
conv3_2 = Conv2D(256, (3, 3), name='conv3_2', padding='same', activation='relu')(conv3_1)
conv3_3 = Conv2D(256, (3, 3), name='conv3_3', padding='same', activation='relu')(conv3_2)
pool3 = MaxPooling2D(name='pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(conv3_3)
# Block4
conv4_1 = Conv2D(512, (3, 3), name='conv4_1', padding='same', activation='relu')(pool3)
conv4_2 = Conv2D(512, (3, 3), name='conv4_2', padding='same', activation='relu')(conv4_1)
conv4_3 = Conv2D(512, (3, 3), name='conv4_3', padding='same', activation='relu')(conv4_2)
pool4 = MaxPooling2D(name='pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(conv4_3)
# Block5
conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu')(pool4)
conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu')(conv5_1)
conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu')(conv5_2)
pool5 = MaxPooling2D(name='pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(conv5_3)
# FC6
fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu')(pool5)
# FC7
fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu')(fc6)
# Block6 (conv8?)
conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7)
conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='same', activation='relu')(conv6_1)
# Block7 (conv9?)
conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2)
conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1)
conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z)
# Block8 (conv 10?)
conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2)
conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='same', strides=(2, 2), activation='relu')(conv8_1)
# Last Pool (conv 11?)
pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2)
# Prediction from conv4_3
num_priors = 3
img_size = (input_shape[1], input_shape[0])
name = 'conv4_3_norm_mbox_conf'
if num_classes != 21:
name += '_{}'.format(num_classes)
conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3)
conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm)
conv4_3_norm_mbox_loc_flat = Flatten(name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc)
conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), name=name, padding='same')(conv4_3_norm)
conv4_3_norm_mbox_conf_flat = Flatten(name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf)
conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0, name='conv4_3_norm_mbox_priorbox',
aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm)
model = Model(inputs=input_layer, outputs=pool6)
return model
In [29]:
input_shape=(300, 300, 3)
model = SSD300(input_shape, num_classes=NUM_CLASSES)
In [27]:
model.summary()
In [ ]: