In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
In [2]:
import tensorflow as tf
import numpy as np
import cv2
import rcnn.simple_nn as nn
from rcnn.video_parser import get_data
from rcnn.data_generators import get_anchor, video_streamer
import pickle
from matplotlib import pyplot as plt
In [3]:
from random import seed
seed(1234)
np.random.seed(1234)
In [4]:
data,_,_ = get_data('./videos/', './annotations/', form='jpg')
In [5]:
with open('config.pickle', 'rb') as f:
C = pickle.load(f)
In [6]:
C.anchor_box_scales = [0.2, 0.5, 0.8, 1., 1.4]
C.anchor_box_ratios = [[1,1], [1,2], [2,1]]
C.im_size = 32
C.rpn_stride = 2
In [7]:
data_gen = video_streamer(data, 2, C, lambda x,y: [x//C.rpn_stride,y//C.rpn_stride],
'tf', mode='test', frame_batchsize=4)
In [8]:
x = next(data_gen)
In [9]:
t = 1
im = x[0][0,t].copy()
im -= im.min()
im /= im.max()
bbs = x[-1][0][t]['bboxes']
for bb in bbs:
a,b,c,d = map(lambda x: x, [bb['x1'], bb['y1'], bb['x2'], bb['y2']])
#cv2.rectangle(im, (a,b), (c,d), color=-1)
im[b:d,a:c] = np.array([0,255,0], dtype='float32')/255
plt.imshow(im)
plt.show()
In [19]:
num_anchors = len(C.anchor_box_ratios) * len(C.anchor_box_scales)
In [20]:
plt.imshow((x[1][0][0,1][...,num_anchors:]).sum(axis=-1))
plt.show()
plt.imshow((x[1][0][0,1][...,:num_anchors]).sum(axis=-1))
plt.show()
In [14]:
sess = tf.Session()
In [15]:
video_input = tf.placeholder(tf.float32, [None,None,None,None,3], name='video_input')
roi_input = tf.placeholder(tf.int64, [None,None,4], name='roi_input')
feature_map_input = tf.placeholder(tf.float32, [1, None, None, nn.shared_dim], 'feature_map_input')
In [16]:
C.num_rois = 32
In [17]:
base = nn.build_shared(video_input)
rpn = nn.build_rpn(base, num_anchors)
classifier = nn.classifier(roi_input, C.num_rois, nb_classes=len(C.class_mapping), trainable=False)(feature_map_input)
In [18]:
saver = tf.train.Saver()
In [19]:
class_mapping = C.class_mapping
class_mapping = {v: k for k, v in class_mapping.items()}
print(class_mapping)
class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
In [20]:
rpn[0] = tf.nn.sigmoid(rpn[0])
classifier[0] = tf.nn.softmax(classifier[0])
#ROIs = []
def predict_rpn_and_featuremap(X):
return sess.run(rpn + [base], {video_input:X})
def predict_classifier(features, ROI):
#global ROIs
#ROIs.append(ROI)
return sess.run(classifier, {feature_map_input: features, roi_input:ROI})
In [21]:
saver.restore(sess, './experiment_save/with_det')
In [22]:
from rcnn.prediction_to_bb import extract_for_det, extract_for_rpn
In [23]:
extract_for_rpn(x[0][0], C)
print('1')
In [24]:
preds_n_base = predict_rpn_and_featuremap(x[0])
In [25]:
Y1, Y2, base_ = preds_n_base
In [31]:
t = 7
#plt.imshow((x[1][0][0,t][...,:num_anchors]).sum(axis=-1))
#plt.show()
plt.imshow(Y1[0,t].max(axis=-1))
plt.show()
plt.imshow(x[0][0,t])
plt.show()
In [27]:
bbs = extract_for_det(Y1[:,t], Y2[:,t], base_[:,t], predict_classifier, C)
In [28]:
bbs
Out[28]:
In [36]:
ly = len(Y1[0])
l = 0
for t in range(ly):
bbs = extract_for_det(Y1[:,t], Y2[:,t], base_[:,t], predict_classifier, C)
l += len(bbs)/len(x[-1][0][t]['bboxes'])
#for bb in bbs:
# a,b,c,d = map(lambda x: x//C.rpn_stride, bb)
print(l/ly)
In [29]:
preds = predict_rpn_and_featuremap(x[0])[0]
In [32]:
im = preds[0,t].max(axis=-1)
im = np.stack([im]*3, axis=-1)
for bb in bbs:
a,b,c,d = map(lambda x: x//C.rpn_stride, bb)
#cv2.rectangle(im, (a,b), (c,d), color=-1)
im[b:d,a:c] = np.array([0,255,0], dtype='float32')/255
plt.imshow(im, vmin=0, vmax=1)
plt.show()
#plt.imshow((x[1][0][0,t][...,:num_anchors]).sum(axis=-1))
#plt.show()
In [90]:
a, b, c, d
Out[90]:
In [69]:
preds[0,19][4:-4,4:-4].max(), preds[0,19].min(), preds[0,19].mean()
Out[69]:
In [29]:
#for bb in ROIs[0][0]:
# im = preds[0,16].max(axis=-1)
# a,b,c,d = bb
#
# c += a
# d += b
#
# cv2.rectangle(im, (a,b), (c,d), 1000)
# plt.imshow(im, vmin=0, vmax=1)
# plt.show()