Form Feature Selection


In [72]:
import sys
sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt

from lib.segmentation import FormFeatureSelection

%matplotlib inline

In [73]:
from sklearn.decomposition import PCA

def plot_corrcoef(features):
    num_features = features.shape[1]
    corrcoef = np.corrcoef(features, rowvar=False)

    plt.pcolor(corrcoef)
    plt.colorbar()
    plt.yticks(np.arange(0.5, num_features + 0.5, 5), np.arange(0, num_features, 5))
    plt.xticks(np.arange(0.5, num_features + 0.5, 5), np.arange(0, num_features, 5))
    plt.show()
    
def plot_pca(features):
    pca = PCA()
    pca.fit(features)
    plt.plot(np.cumsum(pca.explained_variance_ratio_))
    plt.xlabel('Number of components')
    plt.ylabel('Cumulative explained variance')
    plt.show()

MNIST


In [74]:
from lib.datasets import MNIST
 
dataset = MNIST('../data/mnist').test


Extracting ../data/mnist/train-images-idx3-ubyte.gz
Extracting ../data/mnist/train-labels-idx1-ubyte.gz
Extracting ../data/mnist/t10k-images-idx3-ubyte.gz
Extracting ../data/mnist/t10k-labels-idx1-ubyte.gz

SLIC


In [75]:
from lib.segmentation import slic_fixed

slic = slic_fixed(num_segments=100, compactness=5, max_iterations=10, sigma=0)

selector = FormFeatureSelection(dataset, slic, num_examples=1000)

print('Number of segments collected:', selector.features.shape[0])
print('Number of features collected:', selector.num_features)


>> Collecting features 100.00%
Number of segments collected: 65536
Number of features collected: 38

Plot Pearson correlation and PCA


In [76]:
plot_corrcoef(selector.features)



In [77]:
plot_pca(selector.features)


Univariate feature selection


In [78]:
selector.select_univariate(12)

In [79]:
plot_corrcoef(selector.features)


Recursive feature elimination (RFE)


In [80]:
selector.eliminate_recursive(9)

In [81]:
plot_corrcoef(selector.features)



In [82]:
plot_pca(selector.features)



In [83]:
print(selector.selected_feature_indices)
print(selector.selected_features)


[ 4  5  6  7  8 18 20 21 22]
['centroid_x', 'centroid_y', 'eccentricity', 'equivalent_diameter', 'extent', 'inertia_tensor_20', 'inertia_tensor_eigvals_2', 'major_axis_length', 'minor_axis_length']

Quickshift


In [84]:
from lib.segmentation import quickshift_fixed

quickshift = quickshift_fixed(ratio=1, kernel_size=2, max_dist=2, sigma=0)

selector = FormFeatureSelection(dataset, quickshift, num_examples=1000)

print('Number of segments collected:', selector.features.shape[0])
print('Number of features collected:', selector.num_features)


>> Collecting features 100.00%
Number of segments collected: 81081
Number of features collected: 38

In [85]:
selector.select_univariate(12)
selector.eliminate_recursive(9)

print(selector.selected_feature_indices)
print(selector.selected_features)


[ 4  6  7  8 24 28 29 31 37]
['centroid_x', 'eccentricity', 'equivalent_diameter', 'extent', 'mu_03', 'mu_21', 'mu_30', 'nu_03', 'orientation']

Cifar-10


In [86]:
from lib.datasets import Cifar10

dataset = Cifar10('../data/cifar_10').test

SLIC


In [87]:
from lib.segmentation import slic_fixed

slic = slic_fixed(num_segments=200, compactness=5, max_iterations=10, sigma=0)

selector = FormFeatureSelection(dataset, slic, num_examples=1000)

print('Number of segments collected:', selector.features.shape[0])
print('Number of features collected:', selector.num_features)


>> Collecting features 100.00%
Number of segments collected: 232190
Number of features collected: 38

In [88]:
selector.select_univariate(12)
selector.eliminate_recursive(9)

print(selector.selected_feature_indices)
print(selector.selected_features)


[ 0  2  5  7  8  9 19 21 22]
['area', 'bbox_height', 'centroid_y', 'equivalent_diameter', 'extent', 'hu_1', 'inertia_tensor_eigvals_1', 'major_axis_length', 'minor_axis_length']

Quickshift


In [89]:
from lib.segmentation import quickshift_fixed

quickshift = quickshift_fixed(ratio=1, kernel_size=1, max_dist=5, sigma=0)

selector = FormFeatureSelection(dataset, quickshift, num_examples=1000)

print('Number of segments collected:', selector.features.shape[0])
print('Number of features collected:', selector.num_features)


>> Collecting features 100.00%
Number of segments collected: 183030
Number of features collected: 38

In [90]:
selector.select_univariate(12)
selector.eliminate_recursive(9)

print(selector.selected_feature_indices)
print(selector.selected_features)


[ 2  3  4  5  7  8 19 21 22]
['bbox_height', 'bbox_width', 'centroid_x', 'centroid_y', 'equivalent_diameter', 'extent', 'inertia_tensor_eigvals_1', 'major_axis_length', 'minor_axis_length']

PascalVOC


In [91]:
from lib.datasets import PascalVOC

NUM_EXAMPLES = 200
dataset = PascalVOC('../data/pascal_voc', val_size=NUM_EXAMPLES).test

SLIC


In [92]:
from lib.segmentation import slic_fixed

slic = slic_fixed(num_segments=1600, compactness=30, max_iterations=10, sigma=0)

selector = FormFeatureSelection(dataset, slic, num_examples=NUM_EXAMPLES)

print('Number of segments collected:', selector.features.shape[0])
print('Number of features collected:', selector.num_features)


>> Collecting features 100.00%
Number of segments collected: 308268
Number of features collected: 38

In [93]:
selector.select_univariate(12)
selector.eliminate_recursive(9)

print(selector.selected_feature_indices)
print(selector.selected_features)


[ 1  2  3  4  5  6  8  9 21]
['bbox_area', 'bbox_height', 'bbox_width', 'centroid_x', 'centroid_y', 'eccentricity', 'extent', 'hu_1', 'major_axis_length']

Quickshift


In [94]:
from lib.segmentation import quickshift_fixed

quickshift = quickshift_fixed(ratio=0.75, kernel_size=2, max_dist=8, sigma=0)

selector = FormFeatureSelection(dataset, quickshift, num_examples=NUM_EXAMPLES)

print('Number of segments collected:', selector.features.shape[0])
print('Number of features collected:', selector.num_features)


>> Collecting features 100.00%
Number of segments collected: 408261
Number of features collected: 38

In [95]:
selector.select_univariate(12)
selector.eliminate_recursive(9)

print(selector.selected_feature_indices)
print(selector.selected_features)


[ 0  2  3  4  7 19 20 21 22]
['area', 'bbox_height', 'bbox_width', 'centroid_x', 'equivalent_diameter', 'inertia_tensor_eigvals_1', 'inertia_tensor_eigvals_2', 'major_axis_length', 'minor_axis_length']