Isolation Forests



In [1]:

    
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest



In [2]:

    
x = np.random.randint(1, 10, size=(10, 10))
x[-1, -1] = 100  # anomaly
x[-1, -2] = 200



In [3]:

    
x









    Out[3]:





array([[  2,   4,   4,   5,   1,   9,   7,   2,   6,   4],
       [  5,   9,   5,   9,   6,   4,   5,   6,   5,   5],
       [  6,   7,   2,   5,   6,   5,   3,   8,   7,   4],
       [  1,   6,   7,   4,   4,   8,   7,   5,   6,   1],
       [  2,   6,   4,   1,   8,   2,   7,   4,   8,   5],
       [  1,   8,   2,   9,   3,   6,   2,   2,   2,   3],
       [  2,   3,   4,   3,   4,   6,   3,   4,   3,   8],
       [  9,   6,   1,   7,   2,   4,   1,   5,   9,   5],
       [  3,   4,   6,   1,   3,   5,   6,   9,   3,   7],
       [  7,   7,   6,   4,   1,   8,   5,   7, 200, 100]])



In [4]:

    
iforest = IsolationForest(contamination=0.1)
iforest.fit(x)









    Out[4]:





IsolationForest(bootstrap=False, contamination=0.1, max_features=1.0,
        max_samples='auto', n_estimators=100, n_jobs=1, random_state=None,
        verbose=0)



In [5]:

    
iforest.predict(x)









    Out[5]:





array([ 1,  1,  1,  1,  1,  1,  1,  1,  1, -1])



In [6]:

    
est = iforest.estimators_[0]
est.tree_.decision_path(x[-1:, :].astype(np.float32)).todense()









    Out[6]:





matrix([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])



In [8]:

    
est.tree_.children_right









    Out[8]:





array([10,  7,  4, -1,  6, -1, -1,  9, -1, -1, -1])



In [ ]: