In [1]:
import numpy as np
import time
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
import matplotlib.colors as clrs

Reading Files


In [2]:
def readFile(fileName):
    f = open(fileName, "r+")
    
    X = []
    y = []
    
    for line in f.readlines():
        m = line.strip().split(",")
        if len(m) == 3:
            X.append([float(m[0]), float(m[1])])
            y.append(int(float(m[2])))
    
    X = np.asarray(X)
    y = np.asarray(y)
    
    f.close()
    
    return X, y

Helper Function to plot points


In [3]:
def plotPoints(X, y):
    colors = ['r','b','k','y']
    
    X0 = np.asarray([X[i] for i in range(len(y)) if y[i] == 0])
    X1 = np.asarray([X[i] for i in range(len(y)) if y[i] == 1])
    X2 = np.asarray([X[i] for i in range(len(y)) if y[i] == 2])
    X3 = np.asarray([X[i] for i in range(len(y)) if y[i] == 3])
#    print X0.shape, X1.shape, X2.shape, X3.shape
    
    x0 = plt.scatter(X0[:,0], X0[:,1], s = 5, color = colors[0])
    x1 = plt.scatter(X1[:,0], X1[:,1], s = 5, color = colors[1])
    x2 = plt.scatter(X2[:,0], X2[:,1], s = 5, color = colors[2])
    x3 = plt.scatter(X3[:,0], X3[:,1], s = 5, color = colors[3])
    
    plt.legend((x0,x1,x2,x3), ('Class-0','Class-1','Class-2','Class-3'), loc='upper right')
    plt.show()

Draw Plots


In [4]:
X0, y0 = readFile("assignment-5/assign_5_data_0.txt")
plotPoints(X0,y0)



In [5]:
X10, y10 = readFile("assignment-5/assign_5_data_10.txt")
plotPoints(X10,y10)



In [6]:
X20, y20 = readFile("assignment-5/assign_5_data_20.txt")
plotPoints(X20,y20)



In [7]:
X40, y40 = readFile("assignment-5/assign_5_data_40.txt")
plotPoints(X40,y40)



In [8]:
X60, y60 = readFile("assignment-5/assign_5_data_60.txt")
plotPoints(X60,y60)



In [9]:
def findTestIndices(size, p = 0.25):
    testSize = (int)(p*size)
    indicesForTest = np.random.choice(size, testSize, replace=False)
    allIndices = np.arange(size)
    indicesForTrain = np.setdiff1d(allIndices, indicesForTest)
    print testSize
    return indicesForTrain, indicesForTest

In [10]:
def readTrainTestData(X, y, indicesForTrain, indicesForTest):
    XTrain = []
    yTrain = []
    XTest = []
    yTest = []
    
    for i in indicesForTrain:
        XTrain.append(X[i])
        yTrain.append(y[i])
    
    for i in indicesForTest:
        XTest.append(X[i])
        yTest.append(y[i])
        
    return XTrain, yTrain, XTest, yTest

In [11]:
cleanFile = "assignment-5/assign_5_data_0.txt"
XClean, yClean = readFile(cleanFile)
size = XClean.shape[0]
indicesForTrain, indicesForTest = findTestIndices(size, 0.2)
XClean, yClean, XTest, yTest = readTrainTestData(XClean, yClean, indicesForTrain, indicesForTest)


1280

In [12]:
def NNAccuracies():
    noise = [0, 10, 20, 40, 60]
    trainFile = []
    for i in noise:
        trainFile.append("assignment-5/assign_5_data_" + str(i) + ".txt")
    
    NN = []
    NNScores = []
    NNConf = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', alpha=0.0001, learning_rate_init=0.001, max_iter=2000, tol=0.0001, early_stopping=False)
                           
    for i in range(len(trainFile)):
        XTotal, yTotal = readFile(trainFile[i])
        XTrain, yTrain, _, _ = readTrainTestData(XTotal, yTotal, indicesForTrain, indicesForTest)
        NN.append(NNConf.fit(XTrain, yTrain))
        NNScores.append(NN[i].score(XTest, yTest))
            
    return NNScores

In [13]:
def svmAccuracies():
    noise = [0, 10, 20, 40, 60]
    trainFile = []
    for i in noise:
        trainFile.append("assignment-5/assign_5_data_" + str(i) + ".txt")
    
    svm = []
    svmScores = []
    svmC = SVC(C = 1e4, kernel='rbf', tol = 1e-5)
    for i in range(len(trainFile)):
        XTotal, yTotal = readFile(trainFile[i])
        XTrain, yTrain, _, _ = readTrainTestData(XTotal, yTotal, indicesForTrain, indicesForTest)
        svm.append(svmC.fit(XTrain, yTrain))
        svmScores.append(svm[i].score(XTest, yTest))
    
    return svmScores

In [ ]:
svmScores = svmAccuracies()
NNScores = NNAccuracies()

In [ ]:
print svmScores

In [ ]:
print NNScores