In [1]:
import _init_paths

import argparse
import os
import sys
import logging
import pprint
import cv2
from config.config import config, update_config
from utils.image import resize, transform
import numpy as np
# get config
os.environ['PYTHONUNBUFFERED'] = '1'
os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
update_config('./road_train_all.yaml')

sys.path.insert(0, os.path.join('../external/mxnet', config.MXNET_VERSION))
import mxnet as mx
from core.tester import im_detect, Predictor
from symbols import *
from utils.load_model import load_param
from utils.show_boxes import show_boxes
from utils.tictoc import tic, toc
from nms.nms import py_nms_wrapper, cpu_nms_wrapper, gpu_nms_wrapper

In [2]:
# def parse_args():
#     parser = argparse.ArgumentParser(description='Show Deformable ConvNets demo')
#     # general
#     parser.add_argument('--rfcn_only', help='whether use R-FCN only (w/o Deformable ConvNets)', default=False, action='store_true')

#     args = parser.parse_args()
#     return args

# args = parse_args()

def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    # load demo data
    test_image_path = './data/RoadImages/test/'
    image_names = ['71777.jpg', '70522.jpg', '72056.jpg', '71531.jpg', '70925.jpg', '70372.jpg', '70211.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(test_image_path + im_name), ('%s does not exist'.format(test_image_path + im_name))
        im = cv2.imread(test_image_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][1]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param('./output/rfcn/road_obj/road_train_all/all/' +  'rfcn_road', 19 , process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
#     for j in xrange(2):
#         data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
#                                      provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
#                                      provide_label=[None])
#         scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
#         scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        #print('DEBUG: Image Name: {}'.format(im_name))
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        #print('DEBUG: scales: {}'.format(scales))
        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        boxes = boxes[0].astype('f')
        #print('DEBUG: boxes: {}'.format(boxes))
        scores = scores[0].astype('f')
        #print('DEBUG: scores: {}'.format(scores))
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            #print('DEBUG: cls_scores: {}'.format(cls_scores))
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            #print('DEBUG: cls_boxes: {}'.format(cls_boxes))
            cls_dets = np.hstack((cls_boxes, cls_scores))
            #print('DEBUG: cls_dets_1: {}'.format(cls_dets))
            keep = nms(cls_dets)
            #print('DEBUG: keep: {}'.format(keep))
            cls_dets = cls_dets[keep, :]
            #print('DEBUG: cls_dets_2: {}'.format(cls_dets))
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            #print('DEBUG: cls_dets_3: {}'.format(cls_dets))
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        #print('DEBUG: Shape of dets_nms: {}'.format(len(dets_nms)))
        print('DEBUG: dets_nms: {}'.format(dets_nms))
        # visualize
        im = cv2.imread(test_image_path + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'

In [3]:
if __name__ == '__main__':
    main()


{'CLASS_AGNOSTIC': True,
 'MXNET_VERSION': 'mxnet',
 'SCALES': [(360, 640)],
 'TEST': {'BATCH_IMAGES': 1,
          'CXX_PROPOSAL': False,
          'HAS_RPN': True,
          'NMS': 0.3,
          'PROPOSAL_MIN_SIZE': 0,
          'PROPOSAL_NMS_THRESH': 0.7,
          'PROPOSAL_POST_NMS_TOP_N': 2000,
          'PROPOSAL_PRE_NMS_TOP_N': 20000,
          'RPN_MIN_SIZE': 0,
          'RPN_NMS_THRESH': 0.7,
          'RPN_POST_NMS_TOP_N': 300,
          'RPN_PRE_NMS_TOP_N': 6000,
          'max_per_image': 100,
          'test_epoch': 8},
 'TRAIN': {'ALTERNATE': {'RCNN_BATCH_IMAGES': 0,
                         'RPN_BATCH_IMAGES': 0,
                         'rfcn1_epoch': 0,
                         'rfcn1_lr': 0,
                         'rfcn1_lr_step': '',
                         'rfcn2_epoch': 0,
                         'rfcn2_lr': 0,
                         'rfcn2_lr_step': '',
                         'rpn1_epoch': 0,
                         'rpn1_lr': 0,
                         'rpn1_lr_step': '',
                         'rpn2_epoch': 0,
                         'rpn2_lr': 0,
                         'rpn2_lr_step': '',
                         'rpn3_epoch': 0,
                         'rpn3_lr': 0,
                         'rpn3_lr_step': ''},
           'ASPECT_GROUPING': True,
           'BATCH_IMAGES': 1,
           'BATCH_ROIS': -1,
           'BATCH_ROIS_OHEM': 128,
           'BBOX_MEANS': [0.0, 0.0, 0.0, 0.0],
           'BBOX_NORMALIZATION_PRECOMPUTED': True,
           'BBOX_REGRESSION_THRESH': 0.5,
           'BBOX_STDS': [0.1, 0.1, 0.2, 0.2],
           'BBOX_WEIGHTS': array([ 1.,  1.,  1.,  1.]),
           'BG_THRESH_HI': 0.5,
           'BG_THRESH_LO': 0.0,
           'CXX_PROPOSAL': False,
           'ENABLE_OHEM': True,
           'END2END': True,
           'FG_FRACTION': 0.25,
           'FG_THRESH': 0.5,
           'FLIP': False,
           'RESUME': True,
           'RPN_BATCH_SIZE': 256,
           'RPN_BBOX_WEIGHTS': [1.0, 1.0, 1.0, 1.0],
           'RPN_CLOBBER_POSITIVES': False,
           'RPN_FG_FRACTION': 0.5,
           'RPN_MIN_SIZE': 0,
           'RPN_NEGATIVE_OVERLAP': 0.3,
           'RPN_NMS_THRESH': 0.7,
           'RPN_POSITIVE_OVERLAP': 0.7,
           'RPN_POSITIVE_WEIGHT': -1.0,
           'RPN_POST_NMS_TOP_N': 300,
           'RPN_PRE_NMS_TOP_N': 6000,
           'SHUFFLE': True,
           'begin_epoch': 17,
           'end_epoch': 1000,
           'lr': 0.0005,
           'lr_factor': 0.1,
           'lr_step': '17.0053',
           'model_prefix': 'rfcn_road',
           'momentum': 0.9,
           'warmup': True,
           'warmup_lr': 5e-05,
           'warmup_step': 1000,
           'wd': 0.0005},
 'dataset': {'NUM_CLASSES': 5,
             'dataset': 'RoadImages',
             'dataset_path': 'RoadImages',
             'image_set': 'all',
             'proposal': 'rpn',
             'root_path': './data',
             'test_image_set': 'test'},
 'default': {'frequent': 100, 'kvstore': 'device'},
 'gpus': '0',
 'network': {'ANCHOR_RATIOS': [0.5, 1, 2],
             'ANCHOR_SCALES': [4, 8, 16, 32],
             'FIXED_PARAMS': ['conv1',
                              'bn_conv1',
                              'res2',
                              'bn2',
                              'gamma',
                              'beta'],
             'FIXED_PARAMS_SHARED': ['conv1',
                                     'bn_conv1',
                                     'res2',
                                     'bn2',
                                     'res3',
                                     'bn3',
                                     'res4',
                                     'bn4',
                                     'gamma',
                                     'beta'],
             'IMAGE_STRIDE': 0,
             'NUM_ANCHORS': 12,
             'PIXEL_MEANS': array([ 103.06,  115.9 ,  123.15]),
             'RCNN_FEAT_STRIDE': 16,
             'RPN_FEAT_STRIDE': 16,
             'pretrained': './model/pretrained_model/resnet_v1_101',
             'pretrained_epoch': 0},
 'output_path': './output/rfcn/road_obj',
 'symbol': 'resnet_v1_101_rfcn'}
testing 71777.jpg 0.1412s
DEBUG: dets_nms: [array([[ 281.83398438,  168.65238953,  317.3901062 ,  207.3127594 ,
           0.99994469]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32)]
testing 70522.jpg 0.0914s
DEBUG: dets_nms: [array([[  81.17965698,  186.70477295,  276.17254639,  339.71173096,
           0.99999905],
       [ 271.20870972,  173.84107971,  483.92779541,  312.38522339,
           0.99999797],
       [ 446.19244385,  159.71748352,  639.        ,  271.1078186 ,
           0.99998796],
       [  89.90106964,  191.04525757,  169.34194946,  265.09207153,
           0.92970723]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32)]
testing 72056.jpg 0.0620s
DEBUG: dets_nms: [array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32)]
testing 71531.jpg 0.1021s
DEBUG: dets_nms: [array([[   0.        ,  158.05575562,  111.98545837,  282.73934937,
           0.99999726],
       [ 290.92980957,  191.05250549,  395.4899292 ,  266.41885376,
           0.99999654],
       [ 191.10498047,  194.95770264,  271.54794312,  245.90885925,
           0.99998713],
       [ 527.80004883,  198.63490295,  639.        ,  256.92706299,
           0.99998093],
       [ 359.24868774,  198.37361145,  414.8661499 ,  232.88482666,
           0.99988794],
       [ 113.0018692 ,  198.05128479,  184.54141235,  224.04212952,
           0.99981207],
       [ 490.34570312,  204.52850342,  535.34136963,  234.68080139,
           0.999668  ],
       [ 417.72393799,  171.9443512 ,  514.36914062,  230.42294312,
           0.99951851],
       [ 430.45346069,  204.96437073,  510.665802  ,  258.93774414,
           0.99933344],
       [ 243.76255798,  194.67900085,  302.06652832,  232.88847351,
           0.99865073],
       [ 581.58190918,  201.6008606 ,  635.02734375,  225.40930176,
           0.8197155 ]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32)]
testing 70925.jpg 0.1008s
DEBUG: dets_nms: [array([[ 110.91069794,  181.87139893,  208.00515747,  276.84307861,
           0.9999783 ]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32)]
testing 70372.jpg 0.0930s
DEBUG: dets_nms: [array([[ 139.98719788,  118.38962555,  254.24563599,  218.66708374,
           0.99999905],
       [  35.92064667,  140.54841614,   90.66625977,  180.4853363 ,
           0.99995446],
       [   2.90834665,  140.66998291,   70.05532074,  198.54454041,
           0.99995029],
       [  81.76087189,  140.2898407 ,  116.22303772,  170.49441528,
           0.98396218]], dtype=float32), array([[ 379.62957764,  102.51194763,  403.01693726,  160.37957764,
           0.99966991],
       [ 415.01397705,  101.45318604,  437.20275879,  158.97413635,
           0.99858803],
       [ 428.39154053,  103.19496155,  447.71435547,  156.66136169,
           0.98719621]], dtype=float32), array([[ 300.4012146 ,  110.93136597,  326.87158203,  189.81625366,
           0.99666804],
       [ 237.89335632,  128.73202515,  263.72412109,  172.2590332 ,
           0.9938212 ]], dtype=float32), array([], shape=(0, 5), dtype=float32)]
testing 70211.jpg 0.1109s
DEBUG: dets_nms: [array([[  19.4648819 ,  186.85566711,  182.61355591,  272.36849976,
           0.99999893],
       [ 315.61181641,  159.51402283,  441.77929688,  290.5105896 ,
           0.99999332],
       [ 570.37176514,  196.38482666,  639.        ,  302.10678101,
           0.9999789 ],
       [ 229.53302002,  205.12896729,  285.92407227,  229.5020752 ,
           0.99987721],
       [ 144.85998535,  198.54257202,  191.8677063 ,  232.31155396,
           0.9998154 ]], dtype=float32), array([], shape=(0, 5), dtype=float32), array([], shape=(0, 5), dtype=float32), array([[ 118.79203796,   78.05752563,  168.66941833,  101.10171509,
           0.99835157],
       [ 445.56454468,  140.05184937,  478.06488037,  155.3894043 ,
           0.73146671]], dtype=float32)]
done

In [ ]: