In [1]:
import keras
import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline


Using TensorFlow backend.

In [2]:
data = pd.read_csv('voc.csv')
data = data.drop('Unnamed: 0', 1)
data['File_Path'] = './VOCdevkit2007/VOC2007/JPEGImages/' + data['Frame']
#data = data[(data['label'] == 0)].reset_index()
print(data.head())


        Frame   xmin   xmax   ymin   ymax  label  type  \
0  003815.jpg  188.0  433.0  108.0  213.0    0.0  test   
1  003815.jpg  231.0  259.0    1.0   54.0    1.0  test   
2  003815.jpg  186.0  219.0    1.0   54.0    1.0  test   
3  003815.jpg  242.0  274.0   11.0   95.0    1.0  test   
4  003815.jpg  288.0  333.0  125.0  149.0    1.0  test   

                                       File_Path  
0  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
1  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
2  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
3  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
4  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  

In [3]:
def csv2bbox(bbox):
    gta = np.zeros((len(bb_boxes), 4))
    #print(gta.shape)
    #bbox groundtruth before bbox_encode
    for i in range(len(bb_boxes)):
        gta[i, 0] = int(bb_boxes.iloc[i]['xmin'])
        gta[i, 1] = int(bb_boxes.iloc[i]['ymin'])
        gta[i, 2] = int(bb_boxes.iloc[i]['xmax'])
        gta[i, 3] = int(bb_boxes.iloc[i]['ymax'])
    return gta

In [4]:
#load img and bbox
import batch_generate
i_line = np.random.randint(len(data))
name_str, img, bb_boxes = batch_generate.get_img_by_name(data, i_line, size = (960, 640))
gta = csv2bbox(bb_boxes)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
print(len(gta))
for i in range(len(gta)):
    currentAxis.add_patch(plt.Rectangle((gta[i,0], gta[i,1]), gta[i,2]-gta[i,0], gta[i,3]-gta[i,1], fill=False, edgecolor= 'r', linewidth=1))


4

In [5]:
import netarch
import tensorflow as tf
import keras
from keras import backend as K
from keras.optimizers import Adam
from keras.layers import Input
from keras.models import Model
import losses
input_shape_img = (None, None, 3)
img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(300, 4))
# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = netarch.base_net(img_input, trainable=False)
# define the RPN, built on the base layers
num_anchors = 9
rpn = netarch.rpn(shared_layers, num_anchors)
model_rpn = Model(img_input, rpn[:2])
model_rpn.load_weights('vgg16_weights_tf_dim_ordering_tf_kernels.h5', by_name=True)
optimizer = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_classification(num_anchors), losses.rpn_regression(num_anchors)])

In [6]:
y_rpn_cls,y_rpn_regr = batch_generate.label_generate(img, gta)

In [7]:
img_channel_mean = [103.939, 116.779, 123.68]
img_scaling_factor = 1.0
x_img = img.astype(np.float32)
x_img[:, :, 0] -= img_channel_mean[0]
x_img[:, :, 1] -= img_channel_mean[1]
x_img[:, :, 2] -= img_channel_mean[2]
x_img /= img_scaling_factor
bat_img = np.expand_dims(x_img, axis=0)
P_rpn = model_rpn.predict_on_batch(bat_img)

In [8]:
X = bat_img
Y = [y_rpn_cls,y_rpn_regr]
loss_rpn = model_rpn.train_on_batch(X, Y)
P_rpn = model_rpn.predict_on_batch(X)

In [9]:
import utils
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 50)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


(50, 4)

In [10]:
loss_rpn = model_rpn.train_on_batch(X, Y)
P_rpn = model_rpn.predict_on_batch(X)

In [11]:
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 50)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


(50, 4)

In [12]:
for i in range(100):
    loss_rpn = model_rpn.train_on_batch(X, Y)
    P_rpn = model_rpn.predict_on_batch(X)
    print('iter {0}, loss {1}'.format(i, loss_rpn))
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 10)

plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


iter 0, loss [0.7930035, 0.6688112, 0.12419228]
iter 1, loss [0.76646155, 0.65543938, 0.11102219]
iter 2, loss [0.74093431, 0.64202869, 0.098905623]
iter 3, loss [0.71643472, 0.62858039, 0.087854303]
iter 4, loss [0.69295555, 0.61506307, 0.07789246]
iter 5, loss [0.67034507, 0.60136425, 0.068980798]
iter 6, loss [0.64856541, 0.58741742, 0.061147992]
iter 7, loss [0.62757778, 0.5731982, 0.054379579]
iter 8, loss [0.60718739, 0.5585736, 0.048613761]
iter 9, loss [0.58742744, 0.54355478, 0.043872669]
iter 10, loss [0.56836224, 0.52824682, 0.040115435]
iter 11, loss [0.55004305, 0.51266903, 0.037373994]
iter 12, loss [0.53240782, 0.49678394, 0.03562386]
iter 13, loss [0.51545459, 0.48067972, 0.03477487]
iter 14, loss [0.49929178, 0.46452326, 0.034768518]
iter 15, loss [0.48377475, 0.44834918, 0.03542557]
iter 16, loss [0.46883401, 0.43230259, 0.036531426]
iter 17, loss [0.45434791, 0.41647923, 0.03786869]
iter 18, loss [0.44018212, 0.40096614, 0.039215993]
iter 19, loss [0.42628002, 0.38592133, 0.040358685]
iter 20, loss [0.41253877, 0.37142134, 0.041117433]
iter 21, loss [0.39884496, 0.35749444, 0.041350529]
iter 22, loss [0.3853218, 0.34425077, 0.041071016]
iter 23, loss [0.37197554, 0.33165407, 0.040321466]
iter 24, loss [0.35880747, 0.31966352, 0.039143939]
iter 25, loss [0.34590849, 0.30829784, 0.037610643]
iter 26, loss [0.33333865, 0.29748777, 0.035850883]
iter 27, loss [0.3211816, 0.28717661, 0.034004986]
iter 28, loss [0.30961043, 0.27731606, 0.032294374]
iter 29, loss [0.29874796, 0.26794228, 0.03080567]
iter 30, loss [0.28844491, 0.25894216, 0.029502749]
iter 31, loss [0.27872223, 0.25028858, 0.028433653]
iter 32, loss [0.26952714, 0.2419851, 0.027542029]
iter 33, loss [0.26082036, 0.23403208, 0.026788281]
iter 34, loss [0.25260898, 0.22644468, 0.026164312]
iter 35, loss [0.24483666, 0.21920918, 0.025627486]
iter 36, loss [0.23749465, 0.21233457, 0.025160074]
iter 37, loss [0.23053022, 0.20580897, 0.024721254]
iter 38, loss [0.22397967, 0.19964896, 0.024330711]
iter 39, loss [0.21785715, 0.1938765, 0.023980645]
iter 40, loss [0.21222174, 0.18852958, 0.023692153]
iter 41, loss [0.20705956, 0.18357272, 0.02348684]
iter 42, loss [0.20229682, 0.17894803, 0.023348793]
iter 43, loss [0.1978818, 0.1746351, 0.023246707]
iter 44, loss [0.19372036, 0.17057943, 0.023140928]
iter 45, loss [0.18976082, 0.16677181, 0.022989009]
iter 46, loss [0.18597709, 0.16320448, 0.02277261]
iter 47, loss [0.18236941, 0.15983623, 0.022533173]
iter 48, loss [0.17895952, 0.15664375, 0.022315767]
iter 49, loss [0.17575885, 0.15360232, 0.022156538]
iter 50, loss [0.17274278, 0.15071318, 0.02202961]
iter 51, loss [0.16987324, 0.14800696, 0.021866273]
iter 52, loss [0.1671356, 0.14546052, 0.021675074]
iter 53, loss [0.16452262, 0.14305347, 0.021469152]
iter 54, loss [0.16200018, 0.14075579, 0.021244396]
iter 55, loss [0.15962316, 0.13859957, 0.021023586]
iter 56, loss [0.15735051, 0.13651544, 0.020835079]
iter 57, loss [0.15516919, 0.13449958, 0.020669617]
iter 58, loss [0.15302496, 0.13253942, 0.020485537]
iter 59, loss [0.15090796, 0.13062344, 0.020284515]
iter 60, loss [0.14887802, 0.12877713, 0.02010089]
iter 61, loss [0.14694329, 0.12698248, 0.019960808]
iter 62, loss [0.14506574, 0.12521507, 0.019850666]
iter 63, loss [0.14318264, 0.12347698, 0.019705648]
iter 64, loss [0.14129883, 0.12175814, 0.019540694]
iter 65, loss [0.13946418, 0.12006482, 0.019399365]
iter 66, loss [0.13770992, 0.11842936, 0.019280558]
iter 67, loss [0.13596937, 0.11683002, 0.019139346]
iter 68, loss [0.13425608, 0.11528559, 0.018970495]
iter 69, loss [0.13256112, 0.11374477, 0.018816337]
iter 70, loss [0.13089783, 0.11221021, 0.018687628]
iter 71, loss [0.12922314, 0.11068057, 0.018542578]
iter 72, loss [0.12753922, 0.10917205, 0.018367177]
iter 73, loss [0.12589739, 0.10768928, 0.018208103]
iter 74, loss [0.12428897, 0.10621422, 0.018074749]
iter 75, loss [0.12270307, 0.10475583, 0.017947232]
iter 76, loss [0.1211079, 0.10331487, 0.017793031]
iter 77, loss [0.11951388, 0.10186204, 0.017651841]
iter 78, loss [0.11791827, 0.10040064, 0.017517626]
iter 79, loss [0.11632953, 0.098957054, 0.017372474]
iter 80, loss [0.11474298, 0.097529359, 0.017213618]
iter 81, loss [0.11319254, 0.096125275, 0.017067263]
iter 82, loss [0.11166993, 0.094747312, 0.016922614]
iter 83, loss [0.11016384, 0.093391262, 0.016772574]
iter 84, loss [0.10867363, 0.092052937, 0.016620699]
iter 85, loss [0.10719229, 0.090728231, 0.016464064]
iter 86, loss [0.10572492, 0.089418001, 0.016306924]
iter 87, loss [0.10426831, 0.088112757, 0.01615556]
iter 88, loss [0.10280727, 0.086805612, 0.016001659]
iter 89, loss [0.10135267, 0.085504808, 0.015847862]
iter 90, loss [0.099895298, 0.084209554, 0.015685746]
iter 91, loss [0.098445773, 0.08292257, 0.015523202]
iter 92, loss [0.09699513, 0.081640273, 0.015354853]
iter 93, loss [0.095583349, 0.080377899, 0.01520545]
iter 94, loss [0.094193637, 0.079131208, 0.015062425]
iter 95, loss [0.092801563, 0.07790219, 0.014899376]
iter 96, loss [0.091413245, 0.076694272, 0.014718972]
iter 97, loss [0.090046406, 0.075493947, 0.014552461]
iter 98, loss [0.088671573, 0.074288748, 0.014382825]
iter 99, loss [0.087281398, 0.073081255, 0.014200145]
(10, 4)

In [13]:
#load pretrained model(this model is only have 1000times training, the training data is pascal voc2007)
#later I will add a classification layer at the end of this network. this version is just sungle
#region proposal network

In [14]:
input_shape_img = (None, None, 3)
img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(300, 4))
# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = netarch.base_net(img_input, trainable=False)
# define the RPN, built on the base layers
num_anchors = 9
rpn = netarch.rpn(shared_layers, num_anchors)
model_rpn = Model(img_input, rpn[:2])
model_rpn.load_weights('10000rpn.h5', by_name=True)

In [15]:
P_rpn = model_rpn.predict_on_batch(bat_img)

In [16]:
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 20)

In [17]:
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


(20, 4)

In [ ]: