In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv('voc_xywh.csv')
data = data.drop('Unnamed: 0', 1)
#data['File_Path'] = './VOCdevkit2007/VOC2007/JPEGImages/' + data['Frame']
#data = data[(data['label'] == 0)].reset_index()
print(data.head())


        Frame  x_center  y_center      w      h  label  type  \
0  003815.jpg     596.0     308.0  470.0  202.0    0.0  test   
1  003815.jpg     470.0      53.0   54.0  102.0    1.0  test   
2  003815.jpg     389.0      53.0   63.0  102.0    1.0  test   
3  003815.jpg     495.0     102.0   61.0  161.0    1.0  test   
4  003815.jpg     596.0     263.0   86.0   46.0    1.0  test   

                                        FileName  
0  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
1  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
2  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
3  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  
4  ./VOCdevkit2007/VOC2007/JPEGImages/003815.jpg  

In [3]:
#This jupyter version only used on conv5-3, here I am not use multi-sclae features from different convolutional layer
import batch_generate
i_line = np.random.randint(len(data))
name_str, img, bb_boxes = batch_generate.get_img_by_name(data, i_line, size = (960, 640))
copy_img = img
print(bb_boxes)
gta = batch_generate.bbox_transform(bb_boxes)
print(gta)
plt.figure(figsize=(10,10))
plt.imshow(img)
currentAxis = plt.gca()
for i in range(len(gta)):
    currentAxis.add_patch(plt.Rectangle((gta[i,0], gta[i,1]), gta[i,2]-gta[i,0], gta[i,3]-gta[i,1], fill=False, edgecolor= 'r', linewidth=1))


    index       Frame  x_center  y_center      w      h  label      type  \
0   10630  001464.jpg      87.0     317.0   56.0   92.0    8.0  trainval   
1   10631  001464.jpg     142.0     320.0   50.0   92.0    8.0  trainval   
2   10632  001464.jpg     202.0     320.0   61.0   92.0    8.0  trainval   
3   10633  001464.jpg     182.0     180.0   77.0  193.0    8.0  trainval   
4   10634  001464.jpg     257.0     244.0   46.0   54.0    8.0  trainval   
5   10635  001464.jpg     259.0     317.0   58.0   92.0    8.0  trainval   
6   10636  001464.jpg     315.0     317.0   54.0   92.0    8.0  trainval   
7   10637  001464.jpg     371.0     289.0   58.0   60.0    8.0  trainval   
8   10638  001464.jpg     421.0     289.0   52.0   71.0    8.0  trainval   
9   10639  001464.jpg     473.0     284.0   56.0   74.0    8.0  trainval   
10  10640  001464.jpg     524.0     256.0   54.0  101.0    8.0  trainval   
11  10641  001464.jpg     584.0     241.0   61.0  137.0    8.0  trainval   
12  10642  001464.jpg     636.0     229.0   61.0  107.0    8.0  trainval   
13  10643  001464.jpg     689.0     204.0   65.0  110.0    8.0  trainval   
14  10644  001464.jpg     732.0     185.0   50.0  113.0    8.0  trainval   
15  10645  001464.jpg     774.0     162.0   46.0  104.0    8.0  trainval   
16  10646  001464.jpg     812.0     125.0   42.0  137.0    8.0  trainval   
17  10647  001464.jpg     665.0     363.0   44.0   42.0    8.0  trainval   
18  10648  001464.jpg     698.0     418.0   48.0   86.0    8.0  trainval   
19  10649  001464.jpg     624.0     362.0   58.0   80.0    8.0  trainval   
20  10650  001464.jpg     579.0     385.0   56.0   80.0    8.0  trainval   
21  10651  001464.jpg     539.0     323.0   67.0   68.0    8.0  trainval   
22  10652  001464.jpg     463.0     362.0   65.0  110.0    8.0  trainval   
23  10653  001464.jpg     375.0     356.0  102.0  128.0    8.0  trainval   
24  10654  001464.jpg     286.0     399.0   84.0   83.0    8.0  trainval   
25  10655  001464.jpg     187.0     415.0  106.0  122.0    8.0  trainval   
26  10656  001464.jpg     655.0     424.0   54.0   80.0    8.0  trainval   
27  10657  001464.jpg     650.0     500.0   60.0  107.0    8.0  trainval   
28  10658  001464.jpg     590.0     518.0   60.0   95.0    8.0  trainval   
29  10659  001464.jpg     541.0     473.0   50.0  113.0    8.0  trainval   
30  10660  001464.jpg     473.0     481.0   75.0  134.0    8.0  trainval   
31  10661  001464.jpg     397.0     436.0   42.0   39.0    8.0  trainval   
32  10662  001464.jpg     353.0     436.0   42.0   51.0    8.0  trainval   
33  10663  001464.jpg     422.0     487.0   35.0   74.0    8.0  trainval   
34  10664  001464.jpg     374.0     506.0   73.0  107.0    8.0  trainval   
35  10665  001464.jpg     308.0     499.0   60.0  122.0    8.0  trainval   
36  10666  001464.jpg     210.0     545.0  152.0  179.0    8.0  trainval   

                                         FileName  
0   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
1   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
2   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
3   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
4   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
5   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
6   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
7   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
8   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
9   ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
10  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
11  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
12  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
13  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
14  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
15  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
16  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
17  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
18  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
19  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
20  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
21  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
22  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
23  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
24  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
25  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
26  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
27  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
28  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
29  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
30  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
31  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
32  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
33  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
34  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
35  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
36  ./VOCdevkit2007/VOC2007/JPEGImages/001464.jpg  
[[  59.   271.   115.   363. ]
 [ 117.   274.   167.   366. ]
 [ 171.5  274.   232.5  366. ]
 [ 143.5   83.5  220.5  276.5]
 [ 234.   217.   280.   271. ]
 [ 230.   271.   288.   363. ]
 [ 288.   271.   342.   363. ]
 [ 342.   259.   400.   319. ]
 [ 395.   253.5  447.   324.5]
 [ 445.   247.   501.   321. ]
 [ 497.   205.5  551.   306.5]
 [ 553.5  172.5  614.5  309.5]
 [ 605.5  175.5  666.5  282.5]
 [ 656.5  149.   721.5  259. ]
 [ 707.   128.5  757.   241.5]
 [ 751.   110.   797.   214. ]
 [ 791.    56.5  833.   193.5]
 [ 643.   342.   687.   384. ]
 [ 674.   375.   722.   461. ]
 [ 595.   322.   653.   402. ]
 [ 551.   345.   607.   425. ]
 [ 505.5  289.   572.5  357. ]
 [ 430.5  307.   495.5  417. ]
 [ 324.   292.   426.   420. ]
 [ 244.   357.5  328.   440.5]
 [ 134.   354.   240.   476. ]
 [ 628.   384.   682.   464. ]
 [ 620.   446.5  680.   553.5]
 [ 560.   470.5  620.   565.5]
 [ 516.   416.5  566.   529.5]
 [ 435.5  414.   510.5  548. ]
 [ 376.   416.5  418.   455.5]
 [ 332.   410.5  374.   461.5]
 [ 404.5  450.   439.5  524. ]
 [ 337.5  452.5  410.5  559.5]
 [ 278.   438.   338.   560. ]
 [ 134.   455.5  286.   634.5]]

In [4]:
#Inference
import config
from netarch import *
img_channel_mean = [103.939, 116.779, 123.68]
with tf.Graph().as_default():
    mc = config.model_parameters()
    mc.LOAD_PRETRAINED_MODEL = False
    model = ResNet50(mc, '0')
    saver = tf.train.Saver(model.model_params)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        saver.restore(sess, './tf_detection/model.ckpt-9000')
        img = img.astype(np.float32)
        img[:, :, 0] -= img_channel_mean[0]
        img[:, :, 1] -= img_channel_mean[1]
        img[:, :, 2] -= img_channel_mean[2]
        #img_per_batch = np.expand_dims(img, axis = 0)
        det_probs, det_boxes = sess.run([model.det_probs, model.det_boxes],feed_dict={model.image_input:[img], model.keep_prob: 1.0})


Input tensor shape to rpn: (1, 40, 60, 1024)
Input tensor shape to rpn_cls_score: (1, 40, 60, 512)
Input tensor shape to rpn_bbox_pred: (1, 40, 60, 512)
/home/walter/.local/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py:91: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "

In [5]:
print(det_probs.shape, det_boxes.shape)
box_probs = np.reshape(det_probs[0],[-1,2])[:,1]
box_delta = np.reshape(det_boxes[0],[21600,4])
print(box_probs.shape, box_delta.shape)


(1, 40, 60, 18) (1, 40, 60, 36)
(21600,) (21600, 4)

In [6]:
import utils
anchor_box = mc.ANCHOR_BOX
pred_box_xyxy = utils.bbox_delta_convert_inv(anchor_box, box_delta)
box_nms, probs_nms = utils.non_max_suppression_fast(pred_box_xyxy, box_probs, 30, overlap_thresh=0.5)
#print(len(probs_nms))
#print(box_nms)
#keep = utils.nms(box_delta,box_probs,  0.7)
#print(len(keep))

In [7]:
box = box_nms
plt.figure(figsize=(10,10))
plt.imshow(copy_img)
currentAxis = plt.gca()
for i in range(len(box)):
    currentAxis.add_patch(plt.Rectangle((box[i,0], box[i,1]), box[i,2]-box[i,0], box[i,3]-box[i,1], fill=False, edgecolor= 'r', linewidth=1))



In [ ]: