Isolation Forests


In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest

In [2]:
x = np.random.randint(1, 10, size=(10, 10))
x[-1, -1] = 100  # anomaly
x[-1, -2] = 200

In [3]:
x


Out[3]:
array([[  2,   4,   4,   5,   1,   9,   7,   2,   6,   4],
       [  5,   9,   5,   9,   6,   4,   5,   6,   5,   5],
       [  6,   7,   2,   5,   6,   5,   3,   8,   7,   4],
       [  1,   6,   7,   4,   4,   8,   7,   5,   6,   1],
       [  2,   6,   4,   1,   8,   2,   7,   4,   8,   5],
       [  1,   8,   2,   9,   3,   6,   2,   2,   2,   3],
       [  2,   3,   4,   3,   4,   6,   3,   4,   3,   8],
       [  9,   6,   1,   7,   2,   4,   1,   5,   9,   5],
       [  3,   4,   6,   1,   3,   5,   6,   9,   3,   7],
       [  7,   7,   6,   4,   1,   8,   5,   7, 200, 100]])

In [4]:
iforest = IsolationForest(contamination=0.1)
iforest.fit(x)


Out[4]:
IsolationForest(bootstrap=False, contamination=0.1, max_features=1.0,
        max_samples='auto', n_estimators=100, n_jobs=1, random_state=None,
        verbose=0)

In [5]:
iforest.predict(x)


Out[5]:
array([ 1,  1,  1,  1,  1,  1,  1,  1,  1, -1])

In [6]:
est = iforest.estimators_[0]
est.tree_.decision_path(x[-1:, :].astype(np.float32)).todense()


Out[6]:
matrix([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [8]:
est.tree_.children_right


Out[8]:
array([10,  7,  4, -1,  6, -1, -1,  9, -1, -1, -1])

In [ ]: