In [19]:
import keras
import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline

In [20]:
data = pd.read_csv('voc.csv')
data = data.drop('Unnamed: 0', 1)
data['File_Path'] = './VOCdevkit2007/VOC2007/JPEGImages/' + data['Frame']
#data = data[(data['label'] == 0)].reset_index()
print(data.head())


        Frame   xmin   xmax   ymin   ymax  label  type  \
0  003815.jpg  188.0  433.0  108.0  213.0    0.0  test   
1  003815.jpg  231.0  259.0    1.0   54.0    1.0  test   
2  003815.jpg  186.0  219.0    1.0   54.0    1.0  test   
3  003815.jpg  242.0  274.0   11.0   95.0    1.0  test   
4  003815.jpg  288.0  333.0  125.0  149.0    1.0  test   

                                       File_Path  
0  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
1  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
2  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
3  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
4  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  

In [21]:
def csv2bbox(bbox):
    gta = np.zeros((len(bb_boxes), 4))
    #print(gta.shape)
    #bbox groundtruth before bbox_encode
    for i in range(len(bb_boxes)):
        gta[i, 0] = int(bb_boxes.iloc[i]['xmin'])
        gta[i, 1] = int(bb_boxes.iloc[i]['ymin'])
        gta[i, 2] = int(bb_boxes.iloc[i]['xmax'])
        gta[i, 3] = int(bb_boxes.iloc[i]['ymax'])
    return gta

In [57]:
#load img and bbox
import batch_generate
i_line = np.random.randint(len(data))
name_str, img, bb_boxes = batch_generate.get_img_by_name(data, i_line, size = (960, 640))
gta = csv2bbox(bb_boxes)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
print(len(gta))
for i in range(len(gta)):
    currentAxis.add_patch(plt.Rectangle((gta[i,0], gta[i,1]), gta[i,2]-gta[i,0], gta[i,3]-gta[i,1], fill=False, edgecolor= 'r', linewidth=1))


5

In [58]:
import netarch
import tensorflow as tf
import keras
from keras import backend as K
from keras.optimizers import Adam
from keras.layers import Input
from keras.models import Model
input_shape_img = (None, None, 3)
img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(300, 4))
# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = netarch.base_net(img_input, trainable=False)
# define the RPN, built on the base layers
num_anchors = 9
rpn = netarch.rpn(shared_layers, num_anchors)
model_rpn = Model(img_input, rpn[:2])
model_rpn.load_weights('rpn.h5', by_name=True)

In [59]:
img_channel_mean = [103.939, 116.779, 123.68]
img_scaling_factor = 1.0
x_img = img.astype(np.float32)
x_img[:, :, 0] -= img_channel_mean[0]
x_img[:, :, 1] -= img_channel_mean[1]
x_img[:, :, 2] -= img_channel_mean[2]
x_img /= img_scaling_factor
bat_img = np.expand_dims(x_img, axis=0)
P_rpn = model_rpn.predict_on_batch(bat_img)

In [60]:
#print(P_rpn[0])
print(P_rpn[0].shape)


(1, 40, 60, 9)

In [61]:
#print(P_rpn[1])
print(P_rpn[1].shape)


(1, 40, 60, 36)

In [62]:
import utils
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 50)


> 0.7  scores 13

In [63]:
print(boxes)


[[[318 183 818 555]
  [321  92 827 460]
  [189 248 849 677]
  [313 277 810 649]
  [119 195 828 629]
  [179 131 907 548]
  [ 74 133 799 574]
  [ 84  45 785 472]
  [ 23 219 763 677]
  [256  83 943 493]
  [ -4 109 727 526]
  [222 178 973 635]
  [197  43 866 441]
  [158 111 519 523]
  [476  76 835 489]
  [477 176 838 590]
  [228 192 443 380]
  [547 160 764 349]
  [227 240 441 428]
  [422 123 785 540]
  [109 152 472 575]
  [549 254 762 442]
  [548 208 762 396]
  [227 144 444 330]
  [229 288 444 476]
  [230  49 446 235]
  [541 398 761 585]
  [225 381 443 568]
  [104  54 470 476]
  [546 111 764 298]
  [546  64 766 250]
  [225  95 445 282]
  [228 430 446 618]
  [552 302 766 489]
  [546 349 764 537]
  [229 333 443 521]
  [553 445 771 632]
  [392 193 759 614]
  [141 120 197 190]
  [325 169 702 589]
  [201 141 591 564]
  [460 135 518 206]
  [206  52 592 475]
  [254 161 642 599]
  [458  96 515 163]
  [157 122 214 192]
  [369  71 752 496]
  [268  77 650 491]
  [129 124 188 191]
  [477  94 534 162]]]

In [64]:
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


(50, 4)

In [65]:
import losses
input_shape_img = (None, None, 3)
img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(300, 4))
# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = netarch.base_net(img_input, trainable=False)
# define the RPN, built on the base layers
num_anchors = 9
rpn = netarch.rpn(shared_layers, num_anchors)
model_rpn = Model(img_input, rpn[:2])
#Single training On single images
model_rpn.load_weights('vgg16_weights_tf_dim_ordering_tf_kernels.h5', by_name=True)
optimizer = Adam(lr=1e-5)
model_rpn.compile(optimizer=optimizer, loss=[losses.rpn_classification(num_anchors), losses.rpn_regression(num_anchors)])

In [66]:
y_rpn_cls,y_rpn_regr = batch_generate.label_generate(img, gta)


number of postive samples 17
number of negative samples 239

In [67]:
X = bat_img
Y = [y_rpn_cls,y_rpn_regr]
loss_rpn = model_rpn.train_on_batch(X, Y)
P_rpn = model_rpn.predict_on_batch(X)

In [68]:
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 50)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


> 0.7  scores 0
(50, 4)

In [69]:
loss_rpn = model_rpn.train_on_batch(X, Y)
P_rpn = model_rpn.predict_on_batch(X)

In [70]:
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 50)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


> 0.7  scores 0
(50, 4)

In [71]:
#100 training
for i in range(100):
    loss_rpn = model_rpn.train_on_batch(X, Y)
    P_rpn = model_rpn.predict_on_batch(X)
    print('iter {0}, loss {1}'.format(i, loss_rpn))
boxes, scores = utils.propose_cpu(P_rpn[1], P_rpn[0], maximum = 80)

plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
box = boxes[0]
print(box.shape)
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))


iter 0, loss [1.2623751, 0.66573954, 0.59663552]
iter 1, loss [1.2523576, 0.66303283, 0.58932477]
iter 2, loss [1.242063, 0.66025352, 0.58180946]
iter 3, loss [1.2316096, 0.65743399, 0.57417554]
iter 4, loss [1.2210639, 0.65459239, 0.56647152]
iter 5, loss [1.2104659, 0.65173978, 0.55872619]
iter 6, loss [1.1998429, 0.64888376, 0.55095917]
iter 7, loss [1.1892126, 0.64602906, 0.54318345]
iter 8, loss [1.1785898, 0.64318019, 0.53540963]
iter 9, loss [1.167985, 0.64033955, 0.52764535]
iter 10, loss [1.1574061, 0.63750958, 0.51989657]
iter 11, loss [1.1468588, 0.63469195, 0.51216686]
iter 12, loss [1.1363481, 0.63188839, 0.50445974]
iter 13, loss [1.1258775, 0.62909997, 0.49677756]
iter 14, loss [1.1154497, 0.62632757, 0.48912209]
iter 15, loss [1.1050673, 0.62357229, 0.48149502]
iter 16, loss [1.0947341, 0.62083471, 0.47389936]
iter 17, loss [1.0844539, 0.61811548, 0.46633843]
iter 18, loss [1.0742297, 0.6154151, 0.45881465]
iter 19, loss [1.0640633, 0.6127342, 0.45132914]
iter 20, loss [1.0539545, 0.61007291, 0.44388157]
iter 21, loss [1.0439043, 0.60743165, 0.43647268]
iter 22, loss [1.033913, 0.60481083, 0.42910221]
iter 23, loss [1.0239807, 0.60221046, 0.42177027]
iter 24, loss [1.0141077, 0.59963107, 0.41447657]
iter 25, loss [1.0042965, 0.5970726, 0.40722391]
iter 26, loss [0.99454916, 0.59453493, 0.40001425]
iter 27, loss [0.98486578, 0.59201825, 0.39284754]
iter 28, loss [0.97524607, 0.58952254, 0.38572356]
iter 29, loss [0.9656918, 0.58704764, 0.37864417]
iter 30, loss [0.95620465, 0.58459353, 0.37161112]
iter 31, loss [0.94678468, 0.58215994, 0.36462474]
iter 32, loss [0.93743336, 0.57974678, 0.35768658]
iter 33, loss [0.92815244, 0.57735389, 0.35079852]
iter 34, loss [0.91895008, 0.57498121, 0.34396884]
iter 35, loss [0.90983057, 0.57262802, 0.33720258]
iter 36, loss [0.90079427, 0.57029414, 0.33050013]
iter 37, loss [0.89184159, 0.56797945, 0.32386214]
iter 38, loss [0.88297355, 0.56568354, 0.31728998]
iter 39, loss [0.87419069, 0.56340611, 0.31078455]
iter 40, loss [0.86549926, 0.5611468, 0.30435243]
iter 41, loss [0.85690337, 0.55890501, 0.29799837]
iter 42, loss [0.84840631, 0.55668032, 0.29172596]
iter 43, loss [0.84001148, 0.55447185, 0.2855396]
iter 44, loss [0.83172065, 0.55227929, 0.27944136]
iter 45, loss [0.8235358, 0.550102, 0.2734338]
iter 46, loss [0.81546128, 0.5479393, 0.26752195]
iter 47, loss [0.80749935, 0.54579067, 0.26170868]
iter 48, loss [0.79965186, 0.54365551, 0.25599635]
iter 49, loss [0.79191899, 0.54153335, 0.25038564]
iter 50, loss [0.78430009, 0.53942364, 0.24487647]
iter 51, loss [0.77679491, 0.53732592, 0.23946898]
iter 52, loss [0.76940382, 0.53523993, 0.23416388]
iter 53, loss [0.76212573, 0.53316528, 0.22896044]
iter 54, loss [0.75495958, 0.53110176, 0.22385785]
iter 55, loss [0.74790508, 0.52904898, 0.21885608]
iter 56, loss [0.740964, 0.52700692, 0.21395706]
iter 57, loss [0.73413503, 0.524975, 0.20916]
iter 58, loss [0.72741789, 0.52295303, 0.20446484]
iter 59, loss [0.72081155, 0.52094084, 0.19987069]
iter 60, loss [0.71431482, 0.51893836, 0.19537646]
iter 61, loss [0.70792687, 0.51694518, 0.1909817]
iter 62, loss [0.70164698, 0.5149613, 0.18668568]
iter 63, loss [0.69547617, 0.5129866, 0.18248956]
iter 64, loss [0.68941259, 0.51102072, 0.1783919]
iter 65, loss [0.68345487, 0.50906348, 0.1743914]
iter 66, loss [0.67760134, 0.50711489, 0.17048642]
iter 67, loss [0.67185009, 0.5051747, 0.16667537]
iter 68, loss [0.66619968, 0.50324291, 0.16295674]
iter 69, loss [0.66064888, 0.50131959, 0.1593293]
iter 70, loss [0.65519726, 0.49940434, 0.15579294]
iter 71, loss [0.64984369, 0.49749732, 0.15234636]
iter 72, loss [0.64458656, 0.49559835, 0.14898819]
iter 73, loss [0.63942564, 0.4937073, 0.14571837]
iter 74, loss [0.63435936, 0.49182409, 0.14253527]
iter 75, loss [0.62938607, 0.48994875, 0.13943729]
iter 76, loss [0.62450451, 0.48808113, 0.13642338]
iter 77, loss [0.61971533, 0.48622105, 0.13349432]
iter 78, loss [0.61501712, 0.4843685, 0.13064861]
iter 79, loss [0.61040908, 0.48252335, 0.12788571]
iter 80, loss [0.6058898, 0.48068556, 0.12520425]
iter 81, loss [0.6014573, 0.47885484, 0.12260248]
iter 82, loss [0.59711003, 0.47703141, 0.12007863]
iter 83, loss [0.59284592, 0.47521514, 0.11763079]
iter 84, loss [0.5886631, 0.47340611, 0.11525699]
iter 85, loss [0.58455968, 0.47160438, 0.11295528]
iter 86, loss [0.58053386, 0.46981007, 0.11072379]
iter 87, loss [0.57658404, 0.46802324, 0.10856078]
iter 88, loss [0.57270819, 0.46624401, 0.10646417]
iter 89, loss [0.56890494, 0.46447253, 0.10443239]
iter 90, loss [0.56517267, 0.46270892, 0.10246372]
iter 91, loss [0.56150961, 0.46095327, 0.10055633]
iter 92, loss [0.55791432, 0.4592059, 0.098708399]
iter 93, loss [0.55438495, 0.45746684, 0.096918121]
iter 94, loss [0.55091989, 0.45573613, 0.09518373]
iter 95, loss [0.5475176, 0.45401415, 0.09350343]
iter 96, loss [0.5441764, 0.45230088, 0.091875523]
iter 97, loss [0.54089481, 0.45059645, 0.090298362]
iter 98, loss [0.53767145, 0.44890115, 0.088770285]
iter 99, loss [0.53450477, 0.44721502, 0.087289743]
> 0.7  scores 0
(80, 4)

In [ ]: