In [1]:
import numpy as np
import time
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import matplotlib.colors as clrs
In [2]:
def readFile(fileName):
f = open(fileName, "r+")
X = []
y = []
for line in f.readlines():
m = line.strip().split(",")
if len(m) == 3:
X.append([float(m[0]), float(m[1])])
y.append(int(float(m[2])))
X = np.asarray(X)
y = np.asarray(y)
f.close()
return X, y
In [3]:
def plotPoints(X, y):
colors = ['r','b','k','y']
X0 = np.asarray([X[i] for i in range(len(y)) if y[i] == 0])
X1 = np.asarray([X[i] for i in range(len(y)) if y[i] == 1])
X2 = np.asarray([X[i] for i in range(len(y)) if y[i] == 2])
X3 = np.asarray([X[i] for i in range(len(y)) if y[i] == 3])
# print X0.shape, X1.shape, X2.shape, X3.shape
x0 = plt.scatter(X0[:,0], X0[:,1], s = 5, color = colors[0])
x1 = plt.scatter(X1[:,0], X1[:,1], s = 5, color = colors[1])
x2 = plt.scatter(X2[:,0], X2[:,1], s = 5, color = colors[2])
x3 = plt.scatter(X3[:,0], X3[:,1], s = 5, color = colors[3])
plt.legend((x0,x1,x2,x3), ('Class-0','Class-1','Class-2','Class-3'), loc='upper right')
plt.show()
In [4]:
X0, y0 = readFile("assignment-5/assign_5_data_0.txt")
plotPoints(X0,y0)
In [5]:
X10, y10 = readFile("assignment-5/assign_5_data_10.txt")
plotPoints(X10,y10)
In [6]:
X20, y20 = readFile("assignment-5/assign_5_data_20.txt")
plotPoints(X20,y20)
In [7]:
X40, y40 = readFile("assignment-5/assign_5_data_40.txt")
plotPoints(X40,y40)
In [8]:
X60, y60 = readFile("assignment-5/assign_5_data_60.txt")
plotPoints(X60,y60)
In [9]:
def findTestIndices(size, p = 0.25):
testSize = (int)(p*size)
indicesForTest = np.random.choice(size, testSize, replace=False)
allIndices = np.arange(size)
indicesForTrain = np.setdiff1d(allIndices, indicesForTest)
print testSize
return indicesForTrain, indicesForTest
In [10]:
def readTrainTestData(X, y, indicesForTrain, indicesForTest):
XTrain = []
yTrain = []
XTest = []
yTest = []
for i in indicesForTrain:
XTrain.append(X[i])
yTrain.append(y[i])
for i in indicesForTest:
XTest.append(X[i])
yTest.append(y[i])
return XTrain, yTrain, XTest, yTest
In [11]:
cleanFile = "assignment-5/assign_5_data_0.txt"
XClean, yClean = readFile(cleanFile)
size = XClean.shape[0]
indicesForTrain, indicesForTest = findTestIndices(size, 0.2)
XClean, yClean, XTest, yTest = readTrainTestData(XClean, yClean, indicesForTrain, indicesForTest)
In [12]:
def NNAccuracies():
noise = [0, 10, 20, 40, 60]
trainFile = []
for i in noise:
trainFile.append("assignment-5/assign_5_data_" + str(i) + ".txt")
NN = []
NNScores = []
NNConf = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', alpha=0.0001, learning_rate_init=0.001, max_iter=2000, tol=0.0001, early_stopping=False)
for i in range(len(trainFile)):
XTotal, yTotal = readFile(trainFile[i])
XTrain, yTrain, _, _ = readTrainTestData(XTotal, yTotal, indicesForTrain, indicesForTest)
NN.append(NNConf.fit(XTrain, yTrain))
NNScores.append(NN[i].score(XTest, yTest))
return NNScores
In [13]:
def svmAccuracies():
noise = [0, 10, 20, 40, 60]
trainFile = []
for i in noise:
trainFile.append("assignment-5/assign_5_data_" + str(i) + ".txt")
svm = []
svmScores = []
svmC = SVC(C = 1e4, kernel='rbf', tol = 1e-5)
for i in range(len(trainFile)):
XTotal, yTotal = readFile(trainFile[i])
XTrain, yTrain, _, _ = readTrainTestData(XTotal, yTotal, indicesForTrain, indicesForTest)
svm.append(svmC.fit(XTrain, yTrain))
svmScores.append(svm[i].score(XTest, yTest))
return svmScores
In [ ]:
svmScores = svmAccuracies()
NNScores = NNAccuracies()
In [ ]:
print svmScores
In [ ]:
print NNScores