In [1]:
import sys
import os
sys.path.append('../src/')
sys.path.append('/home/ipl/installs/caffe-rc/python/')
import matplotlib
%matplotlib inline
from matplotlib import pylab as plt
import numpy as np
import scipy.misc
import scipy.stats
import caffe
import cv2
import re

from sklearn import svm
from sklearn import metrics
import utils
from datetime import datetime as dt

from dataset import CUB_200_2011
from storage import datastore
from deep_extractor import CNN_Features_CAFFE_REFERENCE
from datetime import datetime as dt
import settings
from parts import *
from cub_utils import *
import skimage

In [2]:
cub = CUB_200_2011(settings.CUB_ROOT)
cub_parts = cub.get_parts()
IDtrain, IDtest = cub.get_train_test_id()
all_image_infos = cub.get_all_image_infos()
all_segmentaion_infos = cub.get_all_segmentation_infos()
cub_parts = cub.get_parts()

In [3]:
fig = plt.figure(figsize=(15, 10))
ax_i = fig.add_subplot(121)
ax_s = fig.add_subplot(122)

img_id = IDtrain[3263]
print all_image_infos[img_id]
seg = thresh_segment_mean(caffe.io.load_image(all_segmentaion_infos[img_id]))
img = caffe.io.load_image(all_image_infos[img_id])
parts = cub_parts.for_image(img_id)
parts_head = parts.filter_by_name(Parts.HEAD_PART_NAMES)

parts_head.draw_part(ax_i)
img_d = parts_head.draw_rect(img)

head_positive = gen_part_points(parts_head.get_rect_info(img), seg, parts_head)
head_positive.draw_part(ax_s, 'red')

head_negative = gen_bg_points(parts_head.get_rect_info(img), seg, parts_head)
head_negative.draw_part(ax_s, 'blue')

ax_i.imshow(img_d)
ax_s.imshow(seg)


/home/ipl/datasets/CUB-200-2011/CUB_200_2011/CUB_200_2011/images/109.American_Redstart/American_Redstart_0066_102774.jpg
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-3-2fd14dcafca9> in <module>()
     13 img_d = parts_head.draw_rect(img)
     14 
---> 15 head_positive = gen_part_points(parts_head.get_rect_info(img), seg, parts_head)
     16 head_positive.draw_part(ax_s, 'red')
     17 

/home/ipl/repo/omgh/src/parts.pyc in get_rect_info(self, img_shape, alpha, add_noise, noise_std_c, noise_std_d)
    137             h = 10
    138         xmin = int(max(0, (c_y - h * alpha)))
--> 139         xmax = int(min(img_shape[0] - 1, (c_y + h * alpha)))
    140         ymin = int(max(0, (c_x - w * alpha)))
    141         ymax = int(min(img_shape[1] - 1, (c_x + w * alpha)))

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [4]:
deep_helper = DeepHelper()

In [11]:
deep_helper.init_with_image(img)
fig = plt.figure(figsize=(20, 5))
I = 1
layer = 'conv4'
feat = deep_helper.feats[layer][:, :, I]

ax = fig.add_subplot(132)
ax.matshow(deep_helper.net.blobs[layer].data[0, I, :, :])
ax = fig.add_subplot(133)
ax.matshow(feat)
ax = fig.add_subplot(131)
ax.imshow(deep_helper.net.deprocess('data', deep_helper.net.blobs['data'].data[0]))


Out[11]:
<matplotlib.image.AxesImage at 0x8dfd650>

In [12]:
tic = dt.now()
head_positive.norm_for_size(img.shape[1], img.shape[0], 227)
head_negative.norm_for_size(img.shape[1], img.shape[0], 227)
feats_positive = deep_helper.features(head_positive)
feats_negative = deep_helper.features(head_negative)
print dt.now() - tic

positive_y = np.ones((len(head_positive))) * 1


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-7cb791866c3f> in <module>()
      1 tic = dt.now()
----> 2 head_positive.norm_for_size(img.shape[1], img.shape[0], 227)
      3 head_negative.norm_for_size(img.shape[1], img.shape[0], 227)
      4 feats_positive = deep_helper.features(head_positive)
      5 feats_negative = deep_helper.features(head_negative)

NameError: name 'head_positive' is not defined

The REAL experiments


In [3]:
dh = DeepHelper()

Make Xtrain features


In [8]:
Xtrain, ytrain = dh.part_features_for_rf(all_image_infos, all_segmentaion_infos
                                         , cub_parts, IDtrain, Parts.HEAD_PART_NAMES)


Out[8]:
((627941, 1376), (627941,))

Make Xtest features


In [8]:
Xtest, ytest = dh.part_features_for_rf(all_image_infos, all_segmentaion_infos
                                       , cub_parts, IDtest, Parts.HEAD_PART_NAMES)

some vars


In [14]:
Xtrain_i_p = '/home/ipl/Desktop/test/Xtrain.mat'
ytrain_i_p = '/home/ipl/Desktop/test/ytrain.mat'

Xtest_i_p = '/home/ipl/Desktop/test/Xtest.mat'
ytest_i_p = '/home/ipl/Desktop/test/ytest.mat'

from storage import datastore
dt = datastore('')

save Xtrain and ytrain


In [ ]:
dt.save_large_instance(Xtrain_i_p, Xtrain, 113)
dt.save_instance(ytrain_i_p, ytrain)

save Xtest and ytest


In [12]:
dt.save_large_instance(Xtest_i_p, Xtest, 43)
dt.save_instance(ytest_i_p, ytest)

load Xtest and Xtrain


In [15]:
Xtrain = dt.load_large_instance(Xtrain_i_p, 113)
Xtest = dt.load_large_instance(Xtest_i_p, 43)
ytrain = dt.load_instance(ytrain_i_p)
ytest = dt.load_instance(ytest_i_p)

ytrain = ytrain[0, :]
ytest = ytest[0, :]

the experiments


In [16]:
import sklearn.ensemble
import sklearn.svm
import sklearn.metrics

RF model


In [17]:
model_rf = sklearn.ensemble.RandomForestClassifier(n_estimators=10, bootstrap=False
                                                   , max_depth=10, n_jobs=3, random_state=None, verbose=0)

In [18]:
model_rf.fit(Xtrain, ytrain)


Out[18]:
RandomForestClassifier(bootstrap=False, compute_importances=None,
            criterion='gini', max_depth=10, max_features='auto',
            max_leaf_nodes=None, min_density=None, min_samples_leaf=1,
            min_samples_split=2, n_estimators=10, n_jobs=3,
            oob_score=False, random_state=None, verbose=0)

In [19]:
preds_rf = model_rf.predict(Xtest)

In [20]:
sklearn.metrics.accuracy_score(ytest, preds_rf)


Out[20]:
0.96716352109177472

In [21]:
print sklearn.metrics.classification_report(ytest, preds_rf)


             precision    recall  f1-score   support

          0       0.97      0.99      0.98    560888
          1       0.87      0.67      0.76     47003

avg / total       0.97      0.97      0.97    607891

SVM model


In [16]:
model_svm = sklearn.svm.LinearSVC(C=0.001)
model_svm.fit(Xtrain, ytrain)


Out[16]:
LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='l2', multi_class='ovr', penalty='l2',
     random_state=None, tol=0.0001, verbose=0)

In [18]:
preds_svm = model_svm.predict(Xtest)
print sklearn.metrics.classification_report(ytest, preds_svm)


             precision    recall  f1-score   support

          0       0.98      0.99      0.99    560888
          1       0.85      0.79      0.82     47003

avg / total       0.97      0.97      0.97    607891


In [19]:
sklearn.metrics.accuracy_score(ytest, preds_svm)


Out[19]:
0.97278459460659883

Dense prediction


In [22]:
dense_points = Parts()

for i in range(227):
    for j in range(227):
        dense_points.append(Part(-1, '?', -1, i, j, 1))

In [23]:
img_id = IDtest[6]
img_id = 3030

def vis_head(img_id):
    img = caffe.io.load_image(all_image_infos[img_id])
    seg = thresh_segment_mean(caffe.io.load_image(all_segmentaion_infos[img_id]))
    parts = cub_parts.for_image(img_id)
    parts_head = parts.filter_by_name(Parts.HEAD_PART_NAMES)

    dh.init_with_image(img)
    X = dh.features(dense_points)
    preds_prob = model_rf.predict_proba(X)
    max_prob = np.max(preds_prob[:, 1])
    preds_prob = preds_prob[:, 1].reshape((227, 227))

    img_r = dh.net.deprocess('data', dh.net.blobs['data'].data[0])

    preds = model_rf.predict(X)
    preds = preds.reshape((227, 227))
    preds = preds_prob >= (max_prob/2)

    fig = plt.figure(figsize=(10, 10))

    ax = fig.add_subplot(221)
    ax.imshow(img_r)
    ax.set_title(img_id)

    ax = fig.add_subplot(224)
    cax = ax.matshow(preds_prob.T)
    fig.colorbar(cax)

    ax = fig.add_subplot(223)
    ax.matshow(preds.T)

    imga = np.zeros((img_r.shape[0], img_r.shape[1], 4))
    imga[:, :, :3] = img_r
    imga[:, :, 3] = preds.T

    ax = fig.add_subplot(222)
    ax.imshow(imga)

for i in [970, 1765, 2490, 2498, 2596, 3642, 3761, 4128, 4144, 4353, 6097, 6122, 6125, 7348, 7919, 8020, 8036, 9446, 9460, 10988]:
    vis_head(i)
    break



In [24]:
vis_head(2707)



In [25]:
def vis_head_process(img_id):
    # img = caffe.io.load_image('/home/ipl/Desktop/pres/Chicken_-_melbourne_show_2005.jpg')
    img = caffe.io.load_image(all_image_infos[img_id])

    dh.init_with_image(img)
    X = dh.features(dense_points)
    preds_prob = model_rf.predict_proba(X)
    max_prob = np.max(preds_prob[:, 1])
    preds_prob = preds_prob[:, 1].reshape((227, 227)).T
    img_r = dh.net.deprocess('data', dh.net.blobs['data'].data[0])
    preds = preds_prob >= (max_prob/2)

    fig = plt.figure(figsize=(10, 10))

    ax = fig.add_subplot(223)
    ax.set_title('RF output')
    ax.matshow(preds)


    preds = skimage.morphology.closing(preds, skimage.morphology.square(10))
    preds = skimage.morphology.remove_small_objects(preds, min_size=10, connectivity=1)
    L, N = skimage.measure.label(preds, return_num=True, background=0)
    L_no_bg = L[L!=-1].flatten()
    vals, counts = scipy.stats.mode(L_no_bg)
    part_label = int(vals[0])
    
    indices = np.where(L==part_label)
    xmin = indices[0].min()
    xmax = indices[0].max()
    ymin = indices[1].min()
    ymax = indices[1].max()
    
    rect_info = xmin, xmax, ymin, ymax
    
    pmin = Part(-1, '?', -1, xmin, ymin, 1)
    pmax = Part(-1, '?', -1, xmax, ymax, 1)
    
    rect_parts = Parts(parts=[pmin, pmax])
    rect_parts.denorm_for_size(img.shape[0], img.shape[1], size=227)
    
    rect_info = rect_parts[0].x, rect_parts[1].x, rect_parts[0].y, rect_parts[1].y
    
    
    
    xmin, xmax, ymin, ymax = rect_info
#     img_or = cv2.resize(img, (227, 227))
    cv2.rectangle(img, (ymin, xmin), (ymax, xmax), 100, 5)
    
    ax = fig.add_subplot(221)
    ax.imshow(img)

    ax = fig.add_subplot(224)
    ax.set_title('Processed')
    cax = ax.matshow(L)

    imga = np.zeros((img_r.shape[0], img_r.shape[1], 4))
    imga[:, :, :3] = img_r
    imga[:, :, 3] = preds_prob

    ax = fig.add_subplot(222)
    ax.imshow(imga)
    ax.set_title(img_id)

for i in range(1, 2):
    vis_head_process(i)



In [26]:
vis_head_process(2706)


Some new Experiments


In [20]:
instance_split = 10
rf_safe = datastore(settings.storage('rf'))
rf_safe.super_name = 'features'
rf_safe.sub_name = 'head-points'
rf_safe.other_sub_name = 'head-final-features'

Xtrain_rf_ip = rf_safe.get_instance_path(rf_safe.super_name, rf_safe.sub_name, 'Xtrain_rf')
Xtest_rf_ip = rf_safe.get_instance_path(rf_safe.super_name, rf_safe.sub_name, 'Xtest_rf')
ytrain_rf_ip = rf_safe.get_instance_path(rf_safe.super_name, rf_safe.sub_name, 'ytrain_rf.mat')
ytest_rf_ip = rf_safe.get_instance_path(rf_safe.super_name, rf_safe.sub_name, 'ytest_rf.mat')
Xtrain_ip = rf_safe.get_instance_path(rf_safe.super_name, rf_safe.other_sub_name, 'Xtrain')
Xtest_ip = rf_safe.get_instance_path(rf_safe.super_name, rf_safe.other_sub_name, 'Xtest')

In [21]:
Xtrain_rf = rf_safe.load_large_instance(Xtrain_rf_ip, instance_split)
ytrain_rf = rf_safe.load_instance(ytrain_rf_ip)
ytrain_rf = ytrain_rf[0, :]

In [22]:
features_storage_r = datastore(settings.storage('ccrft'))
feature_extractor_r = CNN_Features_CAFFE_REFERENCE(features_storage_r, make_net=False)

features_storage_c = datastore(settings.storage('cccft'))
feature_extractor_c = CNN_Features_CAFFE_REFERENCE(features_storage_c, make_net=False)

features_storage_p_h = datastore(settings.storage('ccpheadft-100000'))
feature_extractor_p_h = CNN_Features_CAFFE_REFERENCE(features_storage_p_h, make_net=False)

Xtrain_r, ytrain_r, Xtest_r, ytest_r = cub.get_train_test(feature_extractor_r.extract_one)
Xtrain_c, ytrain_c, Xtest_c, ytest_c = cub.get_train_test(feature_extractor_c.extract_one)
Xtrain_p_h, ytrain_p_h, Xtest_p_h, ytest_p_h = cub.get_train_test(feature_extractor_p_h.extract_one)

In [23]:
new_Xtest_p_h = rf_safe.load_large_instance(Xtest_ip, instance_split)
new_Xtrain_p_h = rf_safe.load_large_instance(Xtrain_ip, instance_split)

In [24]:
C = 0.0001

Xtrain = new_Xtrain_p_h
Xtest = new_Xtest_p_h
ytrain = ytrain_r
ytest = ytest_r

model = svm.LinearSVC(C=C)
model.fit(Xtrain, ytrain)
predictions = model.predict(Xtest)

print 'accuracy', metrics.accuracy_score(ytest, predictions), 'mean accuracy', utils.mean_accuracy(ytest, predictions)


accuracy 0.577321366931 mean accuracy 0.581135

Bird HEAD tracking! (or detection)


In [117]:
def vis_head_video(frame):
    img = frame.copy()
    img = img.astype(np.float32)
    img = img/255.
    
    img_or = img.copy()
    img_or = img_or * 255
    
    dh.init_with_image(img)
    X = dh.features(dense_points)
    preds_prob = model_rf.predict_proba(X)
    max_prob = np.max(preds_prob[:, 1])
    preds_prob = preds_prob[:, 1].reshape((227, 227)).T
    img_r = dh.net.deprocess('data', dh.net.blobs['data'].data[0])
    
    preds = preds_prob >= max(0.3, (max_prob/2))
    
    preds = skimage.morphology.closing(preds, skimage.morphology.square(10))
    preds = skimage.morphology.remove_small_objects(preds, min_size=10, connectivity=1)
    L, N = skimage.measure.label(preds, return_num=True, background=0)
    if N > 0:
        L_no_bg = L[L!=-1].flatten()
        vals, counts = scipy.stats.mode(L_no_bg)
        part_label = int(vals[0])
    
        indices = np.where(L==part_label)
        xmin = indices[0].min()
        xmax = indices[0].max()
        ymin = indices[1].min()
        ymax = indices[1].max()
    
        rect_info = xmin, xmax, ymin, ymax
    
        pmin = Part(-1, '?', -1, xmin, ymin, 1)
        pmax = Part(-1, '?', -1, xmax, ymax, 1)
    
        rect_parts = Parts(parts=[pmin, pmax])
        rect_parts.denorm_for_size(img.shape[0], img.shape[1], size=227)
    
        rect_info = rect_parts[0].x, rect_parts[1].x, rect_parts[0].y, rect_parts[1].y
    
        xmin, xmax, ymin, ymax = rect_info
    
        cv2.rectangle(img_or, (ymin, xmin), (ymax, xmax), 100, 5)
    
    return img_or

In [140]:
vid_address = '/home/ipl/Desktop/bird_videos/crazy-bird.mp4'
cap = cv2.VideoCapture(vid_address)

In [141]:
_, frame = cap.read()

In [142]:
i = 1
while True:
    _, frame = cap.read()
    if frame is None:
        break
    else:
        if i % 25 == 0:
            print i
    frame = frame[:, :, [2, 1, 0]]
    res = vis_head_video(frame)
    res = res[:, :, [2, 1, 0]]
    cv2.imwrite('/home/ipl/Desktop/bird_head_tracking/temp/%d.jpg' % i, res)
    i += 1


25
50
75
100
125
150
175
200
225
250
275
300
325
350
375
400
425
450
475
500
525
550
575
600
625
650
675
700
725
750
775
800
825
850
875
900
925
950
975
1000
1025
1050

In [ ]: