In [2]:
from __future__ import print_function, division
import numpy as np
import os
import pandas as pd
import cv2
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import mxnet as mx
In [3]:
def parse_voc_rec(filename):
"""
parse pascal voc record into a dictionary
:param filename: xml file path
:return: list of dict
"""
import xml.etree.ElementTree as ET
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_dict = dict()
obj_dict['name'] = obj.find('name').text
obj_dict['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_dict['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_dict)
return objects
def iou(x, ys):
"""
Calculate intersection-over-union overlap
Params:
----------
x : numpy.array
single box [xmin, ymin ,xmax, ymax]
ys : numpy.array
multiple box [[xmin, ymin, xmax, ymax], [...], ]
Returns:
-----------
numpy.array
[iou1, iou2, ...], size == ys.shape[0]
"""
ixmin = np.maximum(ys[:, 0], x[0])
iymin = np.maximum(ys[:, 1], x[1])
ixmax = np.minimum(ys[:, 2], x[2])
iymax = np.minimum(ys[:, 3], x[3])
iw = np.maximum(ixmax - ixmin, 0.)
ih = np.maximum(iymax - iymin, 0.)
inters = iw * ih
uni = (x[2] - x[0]) * (x[3] - x[1]) + (ys[:, 2] - ys[:, 0]) * \
(ys[:, 3] - ys[:, 1]) - inters
ious = inters / uni
ious[uni < 1e-12] = 0 # in case bad boxes
return ious
def read_index_file(fname):
with open(fname) as f:
content = f.readlines()
return [x.strip() for x in content]
def image_processing(img, data_shape=(300, 300)):
img = cv2.resize(img, data_shape, interpolation=cv2.INTER_LINEAR)
img_arr = np.asarray(img)
img_arr = img_arr.copy()
# img_arr[:, :, (0, 1, 2)] = img_arr[:, :, (2, 1, 0)]
img_arr = img_arr.astype(float)
pixel_means = [123, 117, 104]
img_arr -= pixel_means
channel_swap = (2, 0, 1)
im_tensor = img_arr.transpose(channel_swap)
im_tensor = im_tensor[np.newaxis, :]
return im_tensor
def image_processing_sp(img, data_shape=(300, 300), pixel_means=[123, 117, 104]):
img = mx.img.imresize(img, 300, 300, cv2.INTER_LINEAR)
img = mx.nd.transpose(img, (2,0,1))
img = img.astype('float32')
img -= mx.nd.array(pixel_means).reshape((len(pixel_means),1,1))
img = mx.nd.expand_dims(img, axis=0)
print (img.shape)
return img
In [184]:
# get pascal image and annotation
img_dir = '../mxnet-ssd/data/pascal/VOC2007/JPEGImages/'
annotations = '../mxnet-ssd/data/pascal/VOC2007/Annotations/'
img_dir = '../mxnet-ssd/data/kaist/images/'
annotations = '../mxnet-ssd/data/kaist/annotations/'
test_set = read_index_file('../mxnet-ssd/data/kaist/image_sets/test.txt')
# rgb ssd
prefix = '../mxnet-ssd/model/resnet/deployed/deploy_ssd_300'
predictor_rgb = mx.model.FeedForward.load(prefix, 0, ctx=mx.gpu(0), numpy_batch_size=1)
In [193]:
FPPI_final = []
FNR_final = []
det_confidence = np.linspace(0.0, 1.0, num=20)
for score_th in det_confidence:
FPPI = []
FNR = []
# for img in ['000200', '000282', '000288', '000477']:
for img in test_set:
# params
gt = 0
tp = 0
fp = 0
fn = 0
gt_bboxs = []
pr_bboxs = []
img_data = np.load(img_dir+img+'.npy')
img_data = img_data[:,:,:3]
# img_data = cv2.imread(img_dir+img+'.jpg')
height, width = img_data.shape[0], img_data.shape[1]
bboxs = parse_voc_rec(annotations+img+'.xml')
img_draw = img_data.copy()
for bbox in bboxs:
if bbox['name'] == 'person':
x = bbox['bbox'][0]
y = bbox['bbox'][1]
xmax = bbox['bbox'][2]
ymax =bbox['bbox'][3]
cv2.rectangle(img_draw,(x,y),(xmax,ymax),(0,255,0), 2)
gt+=1
gt_bboxs.append([x,y,xmax,ymax])
detections = predictor_rgb.predict(image_processing(img_data))[0, :, :]
result = detections[np.where(detections[:, 0] >= 0)[0]]
# get only person class
person_bbox = result[result[:, 0] == 14]
# th predicted bboxes by confidence
for i in range(person_bbox.shape[0]):
score = person_bbox[i, 1]
if score > score_th:
xmin = int(person_bbox[i, 2] * width)
ymin = int(person_bbox[i, 3] * height)
xmax = int(person_bbox[i, 4] * width)
ymax = int(person_bbox[i, 5] * height)
cv2.rectangle(img_draw,(xmin, ymin),(xmax,ymax),(255,0,0), thickness=2)
pr_bboxs.append([xmin,ymin,xmax, ymax, score])
# matching strategy
for pr_box in sorted(pr_bboxs, key=lambda x: x[4], reverse=True):
# get all iou scores
if len(gt_bboxs) == 0:
fp+=1
continue
iou_scores = iou(np.array(pr_box), np.array(gt_bboxs))
# take only >0.5
ind_05 = [x for x in range(len(iou_scores)) if iou_scores[x] >= 0.5]
# get only one perfect match
if len(ind_05) == 1:
tp +=1
# delete gt_bbox
gt_bboxs.remove(gt_bboxs[ind_05[0]])
# if several bboxes with ov>0.5 then take one with maximum overlap
elif len(ind_05) > 1:
f_ind = 0
max_overlap = 0
for i in ind_05:
current_overlap = iou_scores[i]
if current_overlap > max_overlap:
max_overlap = current_overlap
f_ind = i
tp +=1
# delete gt_bbox
gt_bboxs.remove(gt_bboxs[f_ind])
# unmatched pr_box count as false positive
elif len(ind_05) == 0:
fp+=1
# img_draw = cv2.cvtColor(img_draw, cv2.COLOR_BGR2RGB)
# plt.imshow(img_draw)
# plt.axis('off')
# plt.show()
# print info
# print('info')
# print('gt', gt)
# print('fn', gt-tp)
# print('tp', tp)
# print('fp', fp)
# print('predicted bboxes', len(pr_bboxs))
FPPI.append(fp+0.000001)
# FNR = FN/ (FN + TP)
FNR.append((gt-tp)/(gt+0.01))
gt = 0
fn = 0
fp = 0
tp = 0
pr_bboxs = []
gt_bboxs = []
FPPI_final.append(np.array(FPPI).mean())
FNR_final.append(np.array(FNR).mean())
In [192]:
plt.figure()
plt.plot(np.log(FPPI_final), FNR_final)
axes = plt.gca()
axes.set_xlim([0,1])
plt.show()
In [ ]:
# get kaist image and annotation
img_dir = '../mxnet-ssd/data/pascal/VOC2007/JPEGImages/'
annotations = '../mxnet-ssd/data/pascal/VOC2007/Annotations/'
img_dir = '../mxnet-ssd/data/kaist/images/'
annotations = '../mxnet-ssd/data/kaist/annotations/'
test_set = read_index_file('../mxnet-ssd/data/kaist/image_sets/test.txt')
# rgb ssd
prefix = '../mxnet-ssd/model/resnet/deployed/deploy_ssd_300'
predictor_rgb = mx.model.FeedForward.load(prefix, 0, ctx=mx.gpu(0), numpy_batch_size=1)
In [9]:
prefix = '../mxnet-ssd/model/spectral/training_epochs/ssd_300'
predictor_spectral = mx.model.FeedForward.load(prefix, 18, ctx=mx.gpu(0), numpy_batch_size=1)
In [ ]:
predictor_spectral.predict()
In [12]:
# _, args, auxs = mx.model.load_checkpoint('../mxnet-ssd/model/spectral/training_epochs/ssd_300', 18)
# mod = mx.mod.Module(symbol, data_names=['rgb', 'tir'], label_names=None, context=mx.gpu(0))
# mod.bind(data_shapes=[('rgb', (1, 3, 300, 300)), ('tir', (1, 1, 300, 300))])
# mod.set_params(args, auxs)
In [ ]:
FPPI_final = []
FNR_final = []
det_confidence = np.linspace(0.0, 1.0, num=20)
for score_th in det_confidence:
FPPI = []
FNR = []
for img in test_set:
# params
gt = 0
tp = 0
fp = 0
fn = 0
gt_bboxs = []
pr_bboxs = []
img_data = np.load(img_dir+img+'.npy')
img_data = img_data[:,:,:3]
# img_data = cv2.imread(img_dir+img+'.jpg')
height, width = img_data.shape[0], img_data.shape[1]
bboxs = parse_voc_rec(annotations+img+'.xml')
img_draw = img_data.copy()
for bbox in bboxs:
if bbox['name'] == 'person':
x = bbox['bbox'][0]
y = bbox['bbox'][1]
xmax = bbox['bbox'][2]
ymax =bbox['bbox'][3]
cv2.rectangle(img_draw,(x,y),(xmax,ymax),(0,255,0), 2)
gt+=1
gt_bboxs.append([x,y,xmax,ymax])
detections = predictor_rgb.predict(image_processing(img_data))[0, :, :]
result = detections[np.where(detections[:, 0] >= 0)[0]]
# get only person class
person_bbox = result[result[:, 0] == 14]
# th predicted bboxes by confidence
for i in range(person_bbox.shape[0]):
score = person_bbox[i, 1]
if score > score_th:
xmin = int(person_bbox[i, 2] * width)
ymin = int(person_bbox[i, 3] * height)
xmax = int(person_bbox[i, 4] * width)
ymax = int(person_bbox[i, 5] * height)
cv2.rectangle(img_draw,(xmin, ymin),(xmax,ymax),(255,0,0), thickness=2)
pr_bboxs.append([xmin,ymin,xmax, ymax, score])
# matching strategy
for pr_box in sorted(pr_bboxs, key=lambda x: x[4], reverse=True):
# get all iou scores
if len(gt_bboxs) == 0:
fp+=1
continue
iou_scores = iou(np.array(pr_box), np.array(gt_bboxs))
# take only >0.5
ind_05 = [x for x in range(len(iou_scores)) if iou_scores[x] >= 0.5]
# get only one perfect match
if len(ind_05) == 1:
tp +=1
# delete gt_bbox
gt_bboxs.remove(gt_bboxs[ind_05[0]])
# if several bboxes with ov>0.5 then take one with maximum overlap
elif len(ind_05) > 1:
f_ind = 0
max_overlap = 0
for i in ind_05:
current_overlap = iou_scores[i]
if current_overlap > max_overlap:
max_overlap = current_overlap
f_ind = i
tp +=1
# delete gt_bbox
gt_bboxs.remove(gt_bboxs[f_ind])
# unmatched pr_box count as false positive
elif len(ind_05) == 0:
fp+=1
# img_draw = cv2.cvtColor(img_draw, cv2.COLOR_BGR2RGB)
# plt.imshow(img_draw)
# plt.axis('off')
# plt.show()
# print info
# print('info')
# print('gt', gt)
# print('fn', gt-tp)
# print('tp', tp)
# print('fp', fp)
# print('predicted bboxes', len(pr_bboxs))
FPPI.append(fp+0.000001)
# FNR = FN/ (FN + TP)
FNR.append((gt-tp)/(gt+0.01))
gt = 0
fn = 0
fp = 0
tp = 0
pr_bboxs = []
gt_bboxs = []
FPPI_final.append(np.array(FPPI).mean())
FNR_final.append(np.array(FNR).mean())