In [38]:
%pdb


Automatic pdb calling has been turned ON

In [4]:
import neukrill_net.stacked

In [17]:
import sklearn
import sklearn.datasets
import sklearn.ensemble

In [13]:
import numpy as np

In [20]:
import copy

In [26]:
import time

In [8]:
iris = sklearn.datasets.load_iris()

In [10]:
X = iris.data

In [11]:
Y = iris.target

In [14]:
len(np.unique(Y))


Out[14]:
3

In [15]:
hierarchy_dict = {'a':{'a1':0,'a2':1},'b':2}

In [102]:
base_clf = sklearn.ensemble.RandomForestClassifier(n_estimators=1000, max_depth=20, min_samples_leaf=5, random_state=42)

In [103]:
clf = copy.deepcopy(base_clf)

In [24]:
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(X, Y, test_size=0.5, random_state=42)

In [104]:
t0 = time.time()
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=0.573318004608
Accuracy=0.986666666667
Logloss=0.105246571647

In [46]:
%pdb


Automatic pdb calling has been turned OFF

In [87]:
reload(neukrill_net.stacked)


Out[87]:
<module 'neukrill_net.stacked' from '/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-net-tools/neukrill_net/stacked.py'>

In [105]:
hier_clf = neukrill_net.stacked.HierarchyClassifier(hierarchy_dict, base_clf)

In [108]:
t0 = time.time()
hier_clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

t0 = time.time()
p = hier_clf.predict_proba(X_test)
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, p)))
print("Time={}".format(time.time()-t0))

print("Accuracy={}".format(sklearn.metrics.accuracy_score(y_test,np.argmax(p,1))))


Time=1.12970805168
Logloss=0.108236745356
Time=0.0763559341431
Accuracy=0.986666666667

In [97]:
np.argmax(p,1)


Out[97]:
array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0,
       2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0, 1,
       1, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 2, 0,
       1, 2, 0, 2, 2, 1])

In [98]:
print("Accuracy={}".format(sklearn.metrics.accuracy_score(y_test,np.argmax(p,1))))


Accuracy=0.986666666667

In [62]:
hier_clf?

In [109]:
hier_clf.clf_hierarchy


Out[109]:
(RandomForestClassifier(bootstrap=True, compute_importances=None,
             criterion='gini', max_depth=20, max_features='auto',
             max_leaf_nodes=None, min_density=None, min_samples_leaf=5,
             min_samples_split=2, n_estimators=1000, n_jobs=1,
             oob_score=False, random_state=42, verbose=0),
 {0: 'a', 1: 'b'},
 {0: (RandomForestClassifier(bootstrap=True, compute_importances=None,
               criterion='gini', max_depth=20, max_features='auto',
               max_leaf_nodes=None, min_density=None, min_samples_leaf=5,
               min_samples_split=2, n_estimators=1000, n_jobs=1,
               oob_score=False, random_state=42, verbose=0),
   {0: 'a1', 1: 'a2'},
   {0: (None, None, 0), 1: (None, None, 1)}),
  1: (None, None, 2)})

In [72]:
hier_clf.clf_hierarchy[2]


Out[72]:
{0: (RandomForestClassifier(bootstrap=True, compute_importances=None,
              criterion='gini', max_depth=20, max_features='auto',
              max_leaf_nodes=None, min_density=None, min_samples_leaf=5,
              min_samples_split=2, n_estimators=1000, n_jobs=1,
              oob_score=False, random_state=None, verbose=0),
  {0: 'a1', 1: 'a2'},
  {0: (None, None, 0), 1: (None, None, 1)}),
 1: (None, None, 2)}

In [77]:
isinstance(None,dict)


Out[77]:
False

In [78]:
None.iteritems


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-78-d5039475b010> in <module>()
----> 1 None.iteritems

AttributeError: 'NoneType' object has no attribute 'iteritems'