In [1]:
    
%pylab
%matplotlib inline
    
    
In [2]:
    
cd ..
    
    
In [3]:
    
import sys
import numpy as np
import skimage
import cv2
import sklearn
import imp
    
In [4]:
    
from holoviews import *
    
    
In [5]:
    
import neukrill_net.utils
import neukrill_net.highlevelfeatures
    
In [6]:
    
import time
    
In [7]:
    
settings = neukrill_net.utils.Settings('settings.json')
    
In [8]:
    
X,y = settings.flattened_train_paths(settings.classes)
    
In [9]:
    
hlf = neukrill_net.highlevelfeatures.ContourMoments()
    
In [10]:
    
t0 = time.time()
XF = hlf.transform(X)
print("Computing features took {}".format(time.time()-t0))
    
In [24]:
    
XF.shape
    
    Out[24]:
In [26]:
    
sklearn.externals.joblib.dump((hlf,XF,y),'cache/contourmoments.pkl')
    
    Out[26]:
In [11]:
    
import sklearn.naive_bayes
    
In [12]:
    
clf = sklearn.naive_bayes.GaussianNB()
    
In [13]:
    
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
    
    
In [14]:
    
clf = sklearn.linear_model.LogisticRegression(random_state=42)
    
In [15]:
    
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
    
    
In [18]:
    
import sklearn.ensemble
    
In [19]:
    
clf = sklearn.ensemble.RandomForestClassifier(n_estimators=1000, max_depth=20, min_samples_leaf=5)
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
    
    
In [23]:
    
clf = sklearn.svm.SVC(kernel='linear', probability=True, random_state=42)
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
    
    
one-vs-one
In [22]:
    
clf = sklearn.svm.SVC(probability=True, random_state=42)
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
    
    
In [16]:
    
hlf2 = neukrill_net.highlevelfeatures.Haralick()
    
In [17]:
    
hlf_ = hlf+hlf2
    
In [18]:
    
XF_ = hlf_.transform(X)
    
In [19]:
    
XF_.shape
    
    Out[19]:
In [20]:
    
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF_.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)
t1 = time.time()
total = t1-t0
print("Time={}".format(total))
print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))
    
    
In [ ]: