In [415]:
from sklearn import svm
from sklearn.linear_model import LogisticRegression;
from sklearn.ensemble import RandomForestClassifier;
from sklearn.neural_network import MLPClassifier;
from sklearn.tree import DecisionTreeClassifier;
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import math
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB

In [544]:
fileNameTrainFeatures = "bowpos.npy"
fileNameTestFeatures = "testbowpos.npy"

In [ ]:


In [536]:
def loadFeaturesFile(fileName):
    featureLabels = np.load(fileName)
    featureLabels = np.asarray(featureLabels)
    return featureLabels

In [537]:
def loadFeaturesTest(fileName):
    featureTest = np.load(fileName)
    return featureTest

In [538]:
def getClassSize(featureLabels):
    size = np.zeros(5)
    for i in range(0, len(featureLabels)):
        k = int(featureLabels[i][0] - 1)
        size[k] += 1
    return size

In [539]:
featureLabels = loadFeaturesFile(fileNameTrainFeatures)
getClassSize(featureLabels)
testAvailable = True

if testAvailable:
    featureTest = loadFeaturesTest(fileNameTestFeatures)
    Xt, yt, Xv, yv = crossValidation(featureLabels,1)
    Xtest = featureTest
else:
    Xt, yt, Xv, yv = crossValidation(featureLabels,1)


0 3700

SVC


In [540]:
svmClassifier = svm.SVC(C=1e3, kernel='rbf', tol=1e-5, decision_function_shape='ovr')
svmscores = cross_val_score(svmClassifier, Xt, yt, cv=5)

In [542]:
svmscores


Out[542]:
array([ 0.68378378,  0.67432432,  0.67702703,  0.63378378,  0.65675676])

Logistic Regression


In [432]:
LRclassifier = LogisticRegression(multi_class = 'multinomial',solver = 'lbfgs', C = 1e3);
LRscores = cross_val_score(LRclassifier, Xt, yt, cv=5)

In [436]:
LRscores


Out[436]:
array([ 0.65675676,  0.62567568,  0.65945946,  0.61756757,  0.65540541])

Random Forest


In [430]:
RFclassifier = RandomForestClassifier(n_estimators = 1500, criterion = 'entropy');
RFscores = cross_val_score(RFclassifier, Xt, yt, cv=5)

In [437]:
RFscores


Out[437]:
array([ 0.69054054,  0.67837838,  0.71621622,  0.7       ,  0.69324324])

Naive Bayes


In [428]:
gnb = MultinomialNB()
gnbScores = cross_val_score(gnb, Xt, yt, cv = 5)

In [438]:
gnbScores


Out[438]:
array([ 0.67162162,  0.67297297,  0.67567568,  0.65810811,  0.69189189])

Decision Tree


In [433]:
DTclassifier = DecisionTreeClassifier(criterion = 'entropy');
DTscores = cross_val_score(DTclassifier, Xt, yt, cv=5)

In [439]:
DTscores


Out[439]:
array([ 0.63108108,  0.58513514,  0.61081081,  0.61891892,  0.57972973])

Neural Network


In [541]:
NNClassifier = MLPClassifier(hidden_layer_sizes = (400,200 ), activation = 'relu', alpha = 1e-4, max_iter = 1500, learning_rate_init = 0.001, early_stopping = True);
NNscores = cross_val_score(NNClassifier, Xt, yt, cv=5)

In [543]:
NNscores


Out[543]:
array([ 0.67972973,  0.65945946,  0.65810811,  0.61891892,  0.66081081])

TRAIN AND TEST

SVM


In [513]:
svmClassifier = svm.SVC(C=1e3, kernel='rbf', tol=1e-5, decision_function_shape='ovr')
svmClassifier = svmClassifier.fit(Xt, yt)
labelSVM = svmClassifier.predict(Xtest)

In [515]:
labelSVM


Out[515]:
array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 1,
       1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])

In [520]:
NNClassifier = MLPClassifier(hidden_layer_sizes = (400,200 ), activation = 'relu', alpha = 1e-4, max_iter = 1500, learning_rate_init = 0.001, early_stopping = True);
NNClassifier = NNClassifier.fit(Xt, yt)
labelNN = NNClassifier.predict(Xtest)

In [522]:
labelNN


Out[522]:
array([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 3, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4,
       4, 4, 3, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4,
       2, 4, 4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 4, 4,
       3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])