Initialization


In [1]:
!pip3 install imgaug
!pip3 install tqdm
!pip3 install pytube
!wget https://storage.googleapis.com/kent-test/ai_school/yolo/utils.py
!wget https://storage.googleapis.com/kent-test/ai_school/yolo/preprocessing.py
!wget https://storage.googleapis.com/kent-test/ai_school/weights_coco.h5


Collecting imgaug
  Downloading https://files.pythonhosted.org/packages/ad/2e/748dbb7bb52ec8667098bae9b585f448569ae520031932687761165419a2/imgaug-0.2.6.tar.gz (631kB)
    100% |████████████████████████████████| 634kB 5.4MB/s 
Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from imgaug) (0.19.1)
Requirement already satisfied: scikit-image>=0.11.0 in /usr/local/lib/python3.6/dist-packages (from imgaug) (0.13.1)
Requirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.6/dist-packages (from imgaug) (1.14.5)
Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from imgaug) (1.11.0)
Requirement already satisfied: matplotlib>=1.3.1 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.11.0->imgaug) (2.1.2)
Requirement already satisfied: PyWavelets>=0.4.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.11.0->imgaug) (0.5.2)
Requirement already satisfied: networkx>=1.8 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.11.0->imgaug) (2.1)
Requirement already satisfied: pillow>=2.1.0 in /usr/local/lib/python3.6/dist-packages (from scikit-image>=0.11.0->imgaug) (4.0.0)
Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from matplotlib>=1.3.1->scikit-image>=0.11.0->imgaug) (2018.5)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=1.3.1->scikit-image>=0.11.0->imgaug) (0.10.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=1.3.1->scikit-image>=0.11.0->imgaug) (2.2.0)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib>=1.3.1->scikit-image>=0.11.0->imgaug) (2.5.3)
Requirement already satisfied: decorator>=4.1.0 in /usr/local/lib/python3.6/dist-packages (from networkx>=1.8->scikit-image>=0.11.0->imgaug) (4.3.0)
Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from pillow>=2.1.0->scikit-image>=0.11.0->imgaug) (0.45.1)
Building wheels for collected packages: imgaug
  Running setup.py bdist_wheel for imgaug ... - \ done
  Stored in directory: /root/.cache/pip/wheels/97/ec/48/0d25896c417b715af6236dbcef8f0bed136a1a5e52972fc6d0
Successfully built imgaug
Installing collected packages: imgaug
Successfully installed imgaug-0.2.6
Collecting tqdm
  Downloading https://files.pythonhosted.org/packages/c7/e0/52b2faaef4fd87f86eb8a8f1afa2cd6eb11146822033e29c04ac48ada32c/tqdm-4.25.0-py2.py3-none-any.whl (43kB)
    100% |████████████████████████████████| 51kB 2.0MB/s 
Installing collected packages: tqdm
Successfully installed tqdm-4.25.0
Collecting pytube
  Downloading https://files.pythonhosted.org/packages/ff/4e/a499f2a002c6fa922979441104cca30ac4b5119bd5320292ea0edfeb12d9/pytube-9.2.2.tar.gz
Building wheels for collected packages: pytube
  Running setup.py bdist_wheel for pytube ... - \ done
  Stored in directory: /root/.cache/pip/wheels/27/c4/82/7360a8516386c9877c6a5fd4b17523da7163c53b692bb73ba2
Successfully built pytube
Installing collected packages: pytube
Successfully installed pytube-9.2.2
--2018-08-27 04:57:07--  https://storage.googleapis.com/kent-test/ai_school/yolo/utils.py
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.204.128, 2404:6800:4008:c04::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.204.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5000 (4.9K) [text/x-python]
Saving to: ‘utils.py’

utils.py            100%[===================>]   4.88K  --.-KB/s    in 0s      

2018-08-27 04:57:08 (32.1 MB/s) - ‘utils.py’ saved [5000/5000]

--2018-08-27 04:57:09--  https://storage.googleapis.com/kent-test/ai_school/yolo/preprocessing.py
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.204.128, 2404:6800:4008:c00::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.204.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13973 (14K) [text/x-python]
Saving to: ‘preprocessing.py’

preprocessing.py    100%[===================>]  13.65K  --.-KB/s    in 0s      

2018-08-27 04:57:10 (48.6 MB/s) - ‘preprocessing.py’ saved [13973/13973]

--2018-08-27 04:57:11--  https://storage.googleapis.com/kent-test/ai_school/weights_coco.h5
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.204.128, 2404:6800:4008:c04::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.204.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 611907352 (584M) [application/octet-stream]
Saving to: ‘weights_coco.h5’

weights_coco.h5     100%[===================>] 583.56M  51.2MB/s    in 12s     

2018-08-27 04:57:23 (47.8 MB/s) - ‘weights_coco.h5’ saved [611907352/611907352]


In [2]:
from keras.models import Sequential, Model
from keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from keras.layers.advanced_activations import LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.optimizers import SGD, Adam, RMSprop
from keras.layers.merge import concatenate
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf
import imgaug as ia
from tqdm import tqdm
from imgaug import augmenters as iaa
import numpy as np
import pickle
import os, cv2
from preprocessing import parse_annotation, BatchGenerator
from utils import WeightReader, decode_netout, draw_boxes, normalize

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

%matplotlib inline


Using TensorFlow backend.

In [0]:


In [0]:
LABELS = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

IMAGE_H, IMAGE_W = 416, 416
GRID_H,  GRID_W  = 13 , 13
BOX              = 5
CLASS            = len(LABELS)
CLASS_WEIGHTS    = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD    = 0.3#0.5
NMS_THRESHOLD    = 0.3#0.45
ANCHORS          = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]

NO_OBJECT_SCALE  = 1.0
OBJECT_SCALE     = 5.0
COORD_SCALE      = 1.0
CLASS_SCALE      = 1.0

BATCH_SIZE       = 16
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 50

Construct the network


In [0]:
# the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
def space_to_depth_x2(x):
    return tf.space_to_depth(x, block_size=2)

In [0]:
input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
true_boxes  = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))

# Layer 1
x = Conv2D(32, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)
x = BatchNormalization(name='norm_1')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 2
x = Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2', use_bias=False)(x)
x = BatchNormalization(name='norm_2')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 3
x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)
x = BatchNormalization(name='norm_3')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 4
x = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)
x = BatchNormalization(name='norm_4')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 5
x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)
x = BatchNormalization(name='norm_5')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 6
x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
x = BatchNormalization(name='norm_6')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 7
x = Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)
x = BatchNormalization(name='norm_7')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 8
x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)
x = BatchNormalization(name='norm_8')(x)
x = LeakyReLU(alpha=0.1)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 9
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)
x = BatchNormalization(name='norm_9')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 10
x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)
x = BatchNormalization(name='norm_10')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 11
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)
x = BatchNormalization(name='norm_11')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 12
x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)
x = BatchNormalization(name='norm_12')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 13
x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)
x = BatchNormalization(name='norm_13')(x)
x = LeakyReLU(alpha=0.1)(x)

skip_connection = x

x = MaxPooling2D(pool_size=(2, 2))(x)

# Layer 14
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)
x = BatchNormalization(name='norm_14')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 15
x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)
x = BatchNormalization(name='norm_15')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 16
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)
x = BatchNormalization(name='norm_16')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 17
x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)
x = BatchNormalization(name='norm_17')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 18
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)
x = BatchNormalization(name='norm_18')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 19
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)
x = BatchNormalization(name='norm_19')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 20
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)
x = BatchNormalization(name='norm_20')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 21
skip_connection = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_connection)
skip_connection = BatchNormalization(name='norm_21')(skip_connection)
skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
skip_connection = Lambda(space_to_depth_x2)(skip_connection)

x = concatenate([skip_connection, x])

# Layer 22
x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)
x = BatchNormalization(name='norm_22')(x)
x = LeakyReLU(alpha=0.1)(x)

# Layer 23
x = Conv2D(BOX * (4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x)

# small hack to allow true_boxes to be registered when Keras build the model 
# for more information: https://github.com/fchollet/keras/issues/2790
output = Lambda(lambda args: args[0])([output, true_boxes])

model = Model([input_image, true_boxes], output)

In [6]:
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
norm_1 (BatchNormalization)     (None, 416, 416, 32) 128         conv_1[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 416, 416, 32) 0           norm_1[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 208, 208, 32) 0           leaky_re_lu_1[0][0]              
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 208, 208, 64) 18432       max_pooling2d_1[0][0]            
__________________________________________________________________________________________________
norm_2 (BatchNormalization)     (None, 208, 208, 64) 256         conv_2[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_2 (LeakyReLU)       (None, 208, 208, 64) 0           norm_2[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D)  (None, 104, 104, 64) 0           leaky_re_lu_2[0][0]              
__________________________________________________________________________________________________
conv_3 (Conv2D)                 (None, 104, 104, 128 73728       max_pooling2d_2[0][0]            
__________________________________________________________________________________________________
norm_3 (BatchNormalization)     (None, 104, 104, 128 512         conv_3[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_3 (LeakyReLU)       (None, 104, 104, 128 0           norm_3[0][0]                     
__________________________________________________________________________________________________
conv_4 (Conv2D)                 (None, 104, 104, 64) 8192        leaky_re_lu_3[0][0]              
__________________________________________________________________________________________________
norm_4 (BatchNormalization)     (None, 104, 104, 64) 256         conv_4[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_4 (LeakyReLU)       (None, 104, 104, 64) 0           norm_4[0][0]                     
__________________________________________________________________________________________________
conv_5 (Conv2D)                 (None, 104, 104, 128 73728       leaky_re_lu_4[0][0]              
__________________________________________________________________________________________________
norm_5 (BatchNormalization)     (None, 104, 104, 128 512         conv_5[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_5 (LeakyReLU)       (None, 104, 104, 128 0           norm_5[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D)  (None, 52, 52, 128)  0           leaky_re_lu_5[0][0]              
__________________________________________________________________________________________________
conv_6 (Conv2D)                 (None, 52, 52, 256)  294912      max_pooling2d_3[0][0]            
__________________________________________________________________________________________________
norm_6 (BatchNormalization)     (None, 52, 52, 256)  1024        conv_6[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_6 (LeakyReLU)       (None, 52, 52, 256)  0           norm_6[0][0]                     
__________________________________________________________________________________________________
conv_7 (Conv2D)                 (None, 52, 52, 128)  32768       leaky_re_lu_6[0][0]              
__________________________________________________________________________________________________
norm_7 (BatchNormalization)     (None, 52, 52, 128)  512         conv_7[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_7 (LeakyReLU)       (None, 52, 52, 128)  0           norm_7[0][0]                     
__________________________________________________________________________________________________
conv_8 (Conv2D)                 (None, 52, 52, 256)  294912      leaky_re_lu_7[0][0]              
__________________________________________________________________________________________________
norm_8 (BatchNormalization)     (None, 52, 52, 256)  1024        conv_8[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_8 (LeakyReLU)       (None, 52, 52, 256)  0           norm_8[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_4 (MaxPooling2D)  (None, 26, 26, 256)  0           leaky_re_lu_8[0][0]              
__________________________________________________________________________________________________
conv_9 (Conv2D)                 (None, 26, 26, 512)  1179648     max_pooling2d_4[0][0]            
__________________________________________________________________________________________________
norm_9 (BatchNormalization)     (None, 26, 26, 512)  2048        conv_9[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 26, 26, 512)  0           norm_9[0][0]                     
__________________________________________________________________________________________________
conv_10 (Conv2D)                (None, 26, 26, 256)  131072      leaky_re_lu_9[0][0]              
__________________________________________________________________________________________________
norm_10 (BatchNormalization)    (None, 26, 26, 256)  1024        conv_10[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_10 (LeakyReLU)      (None, 26, 26, 256)  0           norm_10[0][0]                    
__________________________________________________________________________________________________
conv_11 (Conv2D)                (None, 26, 26, 512)  1179648     leaky_re_lu_10[0][0]             
__________________________________________________________________________________________________
norm_11 (BatchNormalization)    (None, 26, 26, 512)  2048        conv_11[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_11 (LeakyReLU)      (None, 26, 26, 512)  0           norm_11[0][0]                    
__________________________________________________________________________________________________
conv_12 (Conv2D)                (None, 26, 26, 256)  131072      leaky_re_lu_11[0][0]             
__________________________________________________________________________________________________
norm_12 (BatchNormalization)    (None, 26, 26, 256)  1024        conv_12[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_12 (LeakyReLU)      (None, 26, 26, 256)  0           norm_12[0][0]                    
__________________________________________________________________________________________________
conv_13 (Conv2D)                (None, 26, 26, 512)  1179648     leaky_re_lu_12[0][0]             
__________________________________________________________________________________________________
norm_13 (BatchNormalization)    (None, 26, 26, 512)  2048        conv_13[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_13 (LeakyReLU)      (None, 26, 26, 512)  0           norm_13[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_5 (MaxPooling2D)  (None, 13, 13, 512)  0           leaky_re_lu_13[0][0]             
__________________________________________________________________________________________________
conv_14 (Conv2D)                (None, 13, 13, 1024) 4718592     max_pooling2d_5[0][0]            
__________________________________________________________________________________________________
norm_14 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_14[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_14[0][0]                    
__________________________________________________________________________________________________
conv_15 (Conv2D)                (None, 13, 13, 512)  524288      leaky_re_lu_14[0][0]             
__________________________________________________________________________________________________
norm_15 (BatchNormalization)    (None, 13, 13, 512)  2048        conv_15[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_15 (LeakyReLU)      (None, 13, 13, 512)  0           norm_15[0][0]                    
__________________________________________________________________________________________________
conv_16 (Conv2D)                (None, 13, 13, 1024) 4718592     leaky_re_lu_15[0][0]             
__________________________________________________________________________________________________
norm_16 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_16[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_16 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_16[0][0]                    
__________________________________________________________________________________________________
conv_17 (Conv2D)                (None, 13, 13, 512)  524288      leaky_re_lu_16[0][0]             
__________________________________________________________________________________________________
norm_17 (BatchNormalization)    (None, 13, 13, 512)  2048        conv_17[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_17 (LeakyReLU)      (None, 13, 13, 512)  0           norm_17[0][0]                    
__________________________________________________________________________________________________
conv_18 (Conv2D)                (None, 13, 13, 1024) 4718592     leaky_re_lu_17[0][0]             
__________________________________________________________________________________________________
norm_18 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_18[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_18 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_18[0][0]                    
__________________________________________________________________________________________________
conv_19 (Conv2D)                (None, 13, 13, 1024) 9437184     leaky_re_lu_18[0][0]             
__________________________________________________________________________________________________
norm_19 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_19[0][0]                    
__________________________________________________________________________________________________
conv_21 (Conv2D)                (None, 26, 26, 64)   32768       leaky_re_lu_13[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_19 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_19[0][0]                    
__________________________________________________________________________________________________
norm_21 (BatchNormalization)    (None, 26, 26, 64)   256         conv_21[0][0]                    
__________________________________________________________________________________________________
conv_20 (Conv2D)                (None, 13, 13, 1024) 9437184     leaky_re_lu_19[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_21 (LeakyReLU)      (None, 26, 26, 64)   0           norm_21[0][0]                    
__________________________________________________________________________________________________
norm_20 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_20[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 13, 13, 256)  0           leaky_re_lu_21[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_20 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_20[0][0]                    
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 13, 13, 1280) 0           lambda_1[0][0]                   
                                                                 leaky_re_lu_20[0][0]             
__________________________________________________________________________________________________
conv_22 (Conv2D)                (None, 13, 13, 1024) 11796480    concatenate_1[0][0]              
__________________________________________________________________________________________________
norm_22 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_22[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_22 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_22[0][0]                    
__________________________________________________________________________________________________
conv_23 (Conv2D)                (None, 13, 13, 425)  435625      leaky_re_lu_22[0][0]             
__________________________________________________________________________________________________
reshape_1 (Reshape)             (None, 13, 13, 5, 85 0           conv_23[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 1, 1, 1, 50,  0                                            
__________________________________________________________________________________________________
lambda_2 (Lambda)               (None, 13, 13, 5, 85 0           reshape_1[0][0]                  
                                                                 input_2[0][0]                    
==================================================================================================
Total params: 50,983,561
Trainable params: 50,962,889
Non-trainable params: 20,672
__________________________________________________________________________________________________

Load Model


In [0]:
model.load_weights("weights_coco.h5")

Predict Image


In [0]:
from PIL import Image
from urllib.request import urlopen

im = Image.open(urlopen('https://s2.yimg.com/lo/api/res/1.2/_EO7ZLTvO4MWCPkc8GuOfQ--/YXBwaWQ9eXR3ZnBhZ2U7dz02NDA7cT03NTtzbT0xO2lsPXBsYW5l/http://media.zenfs.com/zh-Hant_TW/News/yahoobeauty/20150611014734_easonhou_249949.jpg'))

In [9]:
import numpy as np
dummy_array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))


plt.figure(figsize=(10,10))

input_image = cv2.resize(np.array(im), (416, 416))
input_image = input_image / 255.
input_image = input_image[:,:,::-1]
input_image = np.expand_dims(input_image, 0)

netout = model.predict([input_image, dummy_array])

netout.shape


Out[9]:
(1, 13, 13, 5, 85)
<matplotlib.figure.Figure at 0x7fb19c9e38d0>

Perform detection on video


In [6]:
!wget https://storage.googleapis.com/kent-test/ai_school/yolo/test1.mp4


--2018-05-31 06:26:29--  https://storage.googleapis.com/kent-test/ai_school/yolo/test1.mp4
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.206.128, 2a00:1450:400c:c04::80
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.206.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 26564439 (25M) [video/mp4]
Saving to: ‘test1.mp4’

test1.mp4           100%[===================>]  25.33M  40.9MB/s    in 0.6s    

2018-05-31 06:26:30 (40.9 MB/s) - ‘test1.mp4’ saved [26564439/26564439]


In [0]:
import cv2
from pytube import YouTube
YouTube('https://www.youtube.com/watch?v=KPxGUAUdRdo').streams.first().download(filename='youtube')

In [13]:
!ls


preprocessing.py  sample_data  weights_coco.h5
__pycache__	  utils.py     youtube.mp4

In [0]:
video_inp = './youtube.mp4' # ./
video_out = 'capation.mp4'

video_reader = cv2.VideoCapture(video_inp)

nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

In [0]:
video_writer = cv2.VideoWriter(filename=video_out,
                               fourcc=cv2.VideoWriter_fourcc(*'MP4V'), 
                               fps=video_reader.get(cv2.CAP_PROP_FPS), 
                               frameSize=(frame_w, frame_h))

dummy_array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))

for i in tqdm(range(int(nb_frames))):
    ret, image = video_reader.read()
    
    input_image = cv2.resize(image, (416, 416))
    input_image = input_image / 255.
    input_image = input_image[:,:,::-1]
    input_image = np.expand_dims(input_image, 0)

    netout = model.predict([input_image, dummy_array])

    boxes = decode_netout(netout[0], 
                          obj_threshold=0.3,
                          nms_threshold=NMS_THRESHOLD,
                          anchors=ANCHORS, 
                          nb_class=CLASS)
    image = draw_boxes(image, boxes, labels=LABELS)

    video_writer.write(np.uint8(image))

    if not ret : break
    
video_reader.release()
video_writer.release()


 28%|██▊       | 1126/4065 [01:35<04:09, 11.80it/s]

In [39]:
!ls


capation.mp4	     __pycache__	 weights_coco.h5
datalab		     test1.mp4		 weights_coco.h5.1
image_list.json      train.json		 yolo_coco_train2014.pkl
mytokenizer_coco.pk  utils.py		 youtube.mp4
preprocessing.py     vgg16_feature.hdf5

In [42]:
from google.colab import files
 
files.download("""./capation.mp4""")


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-42-1f6d00235d58> in <module>()
      1 from google.colab import files
      2 
----> 3 files.download("""./capation.mp4""")

/usr/local/lib/python3.6/dist-packages/google/colab/files.py in download(filename)
    174       'port': port,
    175       'path': os.path.abspath(filename),
--> 176       'name': os.path.basename(filename),
    177   })

/usr/local/lib/python3.6/dist-packages/google/colab/output/_js.py in eval_js(script, ignore_result)
     37   if ignore_result:
     38     return
---> 39   return _message.read_reply_from_input(request_id)
     40 
     41 

/usr/local/lib/python3.6/dist-packages/google/colab/_message.py in read_reply_from_input(message_id, timeout_sec)
     79     reply = _read_next_input_message()
     80     if reply == _NOT_READY or not isinstance(reply, dict):
---> 81       time.sleep(0.025)
     82       continue
     83     if (reply.get('type') == 'colab_reply' and

KeyboardInterrupt: 

上傳到 Google Driver上


In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once in a notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
# Create & upload a text file.
uploaded = drive.CreateFile({'title': 'capation.mp4'})

In [45]:
uploaded.SetContentFile("./capation.mp4")
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))


Uploaded file with ID 1PkJH2y28mbtjgttK0Rss7nEnIXXIAS_C