In [184]:
from sklearn import svm
from sklearn.linear_model import LogisticRegression;
from sklearn.ensemble import RandomForestClassifier;
from sklearn.neural_network import MLPClassifier;
from sklearn.tree import DecisionTreeClassifier;
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import math
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.decomposition import PCA;
In [185]:
fileNameTrainFeatures = "bowpos.npy"
fileNameTestFeatures = "testbowpos.npy"
In [186]:
def loadFeaturesFile(fileName):
featureLabels = np.load(fileName)
featureLabels = np.asarray(featureLabels)
return featureLabels
In [187]:
def loadFeaturesTest(fileName):
featureTest = np.load(fileName)
return featureTest
In [188]:
def getClassSize(featureLabels):
size = np.zeros(5)
for i in range(0, len(featureLabels)):
k = int(featureLabels[i][0] - 1)
size[k] += 1
return size
In [189]:
def defineTrain(featureLabels):
np.random.shuffle(featureLabels)
trainFeat = []
trainLabel = []
n, d = featureLabels.shape
for i in range(0, n):
trainLabel.append(int(featureLabels[i][0]))
trainFeat.append(featureLabels[i][1:])
return trainFeat, trainLabel
In [190]:
featureLabels = loadFeaturesFile(fileNameTrainFeatures)
getClassSize(featureLabels)
testAvailable = True
if testAvailable:
featureTest = loadFeaturesTest(fileNameTestFeatures)
Xt, yt = defineTrain(featureLabels)
Xtest = featureTest
else:
Xt, yt = defineTrain(featureLabels)
In [61]:
pca = PCA(n_components = 300, copy = False);
Xt = pca.fit_transform(Xt);
In [122]:
svmClassifier = svm.SVC(C=1e3, kernel='rbf', tol=1e-5, decision_function_shape='ovr')
svmscores = cross_val_score(svmClassifier, Xt, yt, cv=5)
In [127]:
svmscores
Out[127]:
In [432]:
LRclassifier = LogisticRegression(multi_class = 'multinomial',solver = 'lbfgs', C = 1e3);
LRscores = cross_val_score(LRclassifier, Xt, yt, cv=5)
In [436]:
LRscores
Out[436]:
In [94]:
RFclassifier = RandomForestClassifier(n_estimators = 150, criterion = 'entropy');
RFscores = cross_val_score(RFclassifier, Xt, yt, cv=5)
In [97]:
RFscores
Out[97]:
In [428]:
gnb = MultinomialNB()
gnbScores = cross_val_score(gnb, Xt, yt, cv = 5)
In [438]:
gnbScores
Out[438]:
In [433]:
DTclassifier = DecisionTreeClassifier(criterion = 'entropy');
DTscores = cross_val_score(DTclassifier, Xt, yt, cv=5)
In [439]:
DTscores
Out[439]:
In [193]:
NNClassifier = MLPClassifier(hidden_layer_sizes = (200), activation = 'relu', alpha = 1e-4, max_iter = 15000, learning_rate_init = 0.001, early_stopping = True);
NNscores = cross_val_score(NNClassifier, Xt, yt, cv=5)
In [194]:
NNscores
Out[194]:
In [198]:
svmClassifier = svm.SVC(C=1e3, kernel='rbf', tol=1e-5, decision_function_shape='ovr')
svmClassifier = svmClassifier.fit(Xt, yt)
labelSVM = svmClassifier.predict(Xtest)
In [199]:
labelSVM
Out[199]:
In [218]:
NNClassifier = []
labelNN = []
for i in range(0, 10):
NNClassifier.append(MLPClassifier(hidden_layer_sizes = (200), activation = 'relu', alpha = 1e-4, max_iter = 15000, learning_rate_init = 0.001, early_stopping = True))
for i in range(0,10):
NNClassifier[i] = NNClassifier[i].fit(Xt, yt)
labelNN.append(NNClassifier[i].predict(Xtest))
labelNN = np.asarray(labelNN)
In [219]:
results = []
for j in range(0, labelNN.shape[1]):
z = []
for i in range(0, labelNN.shape[0]):
z.append(labelNN[i][j])
z = np.asarray(z)
r = np.bincount(z).argmax()
results.append(r)
results
Out[219]:
In [200]:
def saveSolution(fileName, labels):
f = open(fileName, "w+")
for i in labels:
f.write(str(i))
f.write("\n")
f.close()
In [220]:
saveSolution("assignment3-13962.txt",results)
In [212]:
Out[212]: