In [184]:
from sklearn import svm
from sklearn.linear_model import LogisticRegression;
from sklearn.ensemble import RandomForestClassifier;
from sklearn.neural_network import MLPClassifier;
from sklearn.tree import DecisionTreeClassifier;
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import math
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.decomposition import PCA;

In [185]:
fileNameTrainFeatures = "bowpos.npy"
fileNameTestFeatures = "testbowpos.npy"

In [186]:
def loadFeaturesFile(fileName):
    featureLabels = np.load(fileName)
    featureLabels = np.asarray(featureLabels)
    return featureLabels

In [187]:
def loadFeaturesTest(fileName):
    featureTest = np.load(fileName)
    return featureTest

In [188]:
def getClassSize(featureLabels):
    size = np.zeros(5)
    for i in range(0, len(featureLabels)):
        k = int(featureLabels[i][0] - 1)
        size[k] += 1
    return size

In [189]:
def defineTrain(featureLabels):
    np.random.shuffle(featureLabels)
    trainFeat = []
    trainLabel = []
    
    n, d = featureLabels.shape
    for i in range(0, n):
        trainLabel.append(int(featureLabels[i][0]))
        trainFeat.append(featureLabels[i][1:])
    
    return trainFeat, trainLabel

In [190]:
featureLabels = loadFeaturesFile(fileNameTrainFeatures)
getClassSize(featureLabels)
testAvailable = True

if testAvailable:
    featureTest = loadFeaturesTest(fileNameTestFeatures)
    Xt, yt = defineTrain(featureLabels)
    Xtest = featureTest
else:
    Xt, yt = defineTrain(featureLabels)

PCA


In [61]:
pca = PCA(n_components = 300, copy = False);
Xt = pca.fit_transform(Xt);

SVC


In [122]:
svmClassifier = svm.SVC(C=1e3, kernel='rbf', tol=1e-5, decision_function_shape='ovr')
svmscores = cross_val_score(svmClassifier, Xt, yt, cv=5)

In [127]:
svmscores


Out[127]:
array([ 0.70675676,  0.69864865,  0.70810811,  0.6972973 ,  0.73108108])

Logistic Regression


In [432]:
LRclassifier = LogisticRegression(multi_class = 'multinomial',solver = 'lbfgs', C = 1e3);
LRscores = cross_val_score(LRclassifier, Xt, yt, cv=5)

In [436]:
LRscores


Out[436]:
array([ 0.65675676,  0.62567568,  0.65945946,  0.61756757,  0.65540541])

Random Forest


In [94]:
RFclassifier = RandomForestClassifier(n_estimators = 150, criterion = 'entropy');
RFscores = cross_val_score(RFclassifier, Xt, yt, cv=5)

In [97]:
RFscores


Out[97]:
array([ 0.68513514,  0.68108108,  0.66081081,  0.65945946,  0.70135135])

Naive Bayes


In [428]:
gnb = MultinomialNB()
gnbScores = cross_val_score(gnb, Xt, yt, cv = 5)

In [438]:
gnbScores


Out[438]:
array([ 0.67162162,  0.67297297,  0.67567568,  0.65810811,  0.69189189])

Decision Tree


In [433]:
DTclassifier = DecisionTreeClassifier(criterion = 'entropy');
DTscores = cross_val_score(DTclassifier, Xt, yt, cv=5)

In [439]:
DTscores


Out[439]:
array([ 0.63108108,  0.58513514,  0.61081081,  0.61891892,  0.57972973])

Neural Network


In [193]:
NNClassifier = MLPClassifier(hidden_layer_sizes = (200), activation = 'relu', alpha = 1e-4, max_iter = 15000, learning_rate_init = 0.001, early_stopping = True);
NNscores = cross_val_score(NNClassifier, Xt, yt, cv=5)

In [194]:
NNscores


Out[194]:
array([ 0.71081081,  0.7027027 ,  0.69594595,  0.7027027 ,  0.71621622])

TRAIN AND TEST

SVM


In [198]:
svmClassifier = svm.SVC(C=1e3, kernel='rbf', tol=1e-5, decision_function_shape='ovr')
svmClassifier = svmClassifier.fit(Xt, yt)
labelSVM = svmClassifier.predict(Xtest)

In [199]:
labelSVM


Out[199]:
array([5, 4, 5, 4, 4, 4, 4, 2, 2, 4, 5, 2, 1, 3, 1, 1, 1, 4, 1, 1, 2, 4, 4,
       1, 2, 4, 2, 3, 2, 1, 1, 4, 4, 4, 1, 4, 1, 4, 3, 2, 2, 1, 2, 3, 1, 5,
       5, 3, 2, 2, 2, 2, 5, 2, 3, 2, 2, 1, 5, 1, 1, 2, 2, 2, 5, 1, 1, 2, 4,
       5, 4, 4, 2, 5, 5, 1, 5, 5, 4, 5, 1, 5, 1, 1, 4, 1, 4, 5, 3, 4, 4, 2,
       4, 5, 5, 2, 1, 5, 2, 5])

In [218]:
NNClassifier = []
labelNN = []
for i in range(0, 10):
    NNClassifier.append(MLPClassifier(hidden_layer_sizes = (200), activation = 'relu', alpha = 1e-4, max_iter = 15000, learning_rate_init = 0.001, early_stopping = True))

for i in range(0,10):
    NNClassifier[i] = NNClassifier[i].fit(Xt, yt)
    labelNN.append(NNClassifier[i].predict(Xtest))

labelNN = np.asarray(labelNN)

In [219]:
results = []
for j in range(0, labelNN.shape[1]):
    z = []
    for i in range(0, labelNN.shape[0]):
        z.append(labelNN[i][j])
    z = np.asarray(z)
    r = np.bincount(z).argmax()
    results.append(r)

results


Out[219]:
[5,
 4,
 5,
 4,
 3,
 4,
 4,
 5,
 2,
 4,
 5,
 2,
 2,
 3,
 1,
 1,
 1,
 4,
 2,
 1,
 2,
 4,
 4,
 5,
 2,
 4,
 2,
 3,
 2,
 1,
 1,
 4,
 4,
 4,
 1,
 4,
 1,
 4,
 3,
 2,
 2,
 1,
 2,
 3,
 2,
 1,
 3,
 3,
 2,
 5,
 2,
 2,
 5,
 2,
 3,
 2,
 2,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 5,
 1,
 1,
 2,
 4,
 5,
 4,
 4,
 2,
 5,
 5,
 1,
 1,
 5,
 4,
 5,
 1,
 5,
 1,
 1,
 4,
 1,
 4,
 5,
 3,
 5,
 4,
 2,
 4,
 5,
 5,
 2,
 1,
 5,
 2,
 5]

In [200]:
def saveSolution(fileName, labels):
    f = open(fileName, "w+")
    for i in labels:
        f.write(str(i))
        f.write("\n")
    f.close()

In [220]:
saveSolution("assignment3-13962.txt",results)

In [212]:



Out[212]:
1