In [38]:
%pdb
In [4]:
import neukrill_net.stacked
In [17]:
import sklearn
import sklearn.datasets
import sklearn.ensemble
In [13]:
import numpy as np
In [20]:
import copy
In [26]:
import time
In [8]:
iris = sklearn.datasets.load_iris()
In [10]:
X = iris.data
In [11]:
Y = iris.target
In [14]:
len(np.unique(Y))
Out[14]:
In [15]:
hierarchy_dict = {'a':{'a1':0,'a2':1},'b':2}
In [102]:
base_clf = sklearn.ensemble.RandomForestClassifier(n_estimators=1000, max_depth=20, min_samples_leaf=5, random_state=42)
In [103]:
clf = copy.deepcopy(base_clf)
In [24]:
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(X, Y, test_size=0.5, random_state=42)
In [104]:
t0 = time.time()
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
In [46]:
%pdb
In [87]:
reload(neukrill_net.stacked)
Out[87]:
In [105]:
hier_clf = neukrill_net.stacked.HierarchyClassifier(hierarchy_dict, base_clf)
In [108]:
t0 = time.time()
hier_clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
t0 = time.time()
p = hier_clf.predict_proba(X_test)
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, p)))
print("Time={}".format(time.time()-t0))
print("Accuracy={}".format(sklearn.metrics.accuracy_score(y_test,np.argmax(p,1))))
In [97]:
np.argmax(p,1)
Out[97]:
In [98]:
print("Accuracy={}".format(sklearn.metrics.accuracy_score(y_test,np.argmax(p,1))))
In [62]:
hier_clf?
In [109]:
hier_clf.clf_hierarchy
Out[109]:
In [72]:
hier_clf.clf_hierarchy[2]
Out[72]:
In [77]:
isinstance(None,dict)
Out[77]:
In [78]:
None.iteritems