In [1]:
import numpy as np
import os
import random
import matplotlib.pyplot as plt
%matplotlib inline

import FullyConnectedNN as fnn

In [2]:
#
# Analysis parameters
#
trnDir = "data/edgevar/train/"
testDir = "data/edgevar/test/"
unlabeledDir = "data/edgevar/unlabeled/"
bThetas = True

trnfnames = ["edge10_", "edge40", "edge15"]
testfnames = ["edge10_", "edge40", "edge15"]
unlabeledfnames = ["edge10_", "edge15", "edge20", "edge25_", "edge30", "edge35", "edge40"]

In [5]:
nTrn = 0
nTrn_lbl = 0
maxTrn = 20000
# Compile training set
trn_data = []
trn_inputs = []
trn_labels = []

#for aFile in os.listdir(trnInpDir):
            
for f in trnfnames:
    print "Processing " + f + " as training data"
    thetas = []
    xs = []
    ys = []
    fin = open(trnDir+f, 'r')
    for line in fin.readlines():
        if line == "\n": continue
        if line.startswith("label"):
            # Done a block
            label = [[0.], [0.]]
            idx = int(float(line.split()[1]))
            label[idx] = [1.]
            trn_labels.append(label)
            if bThetas:
                trn_inputs.append(thetas)
            else:
                trn_inputs.append(thetas + xs + ys)
            thetas = []
            xs = []
            ys = []
            nTrn+=1
            if nTrn == maxTrn: break
            continue
        spt = [float(x) for x in line.split()]
        xs.append([spt[0]])
        ys.append([spt[1]])
        thetas.append([spt[2]])

    fin.close()
#         label = [[float(x)] for x in line.split()]
#         trn_labels.append(label)
#         nTrn_lbl+=1
#         if nTrn_lbl == maxTrn: break

    if nTrn == maxTrn: break
        
for i in range(len(trn_inputs)):
    trn_data.append((np.asarray(trn_inputs[i]), np.asarray(trn_labels[i])))
    
random.shuffle(trn_data)

print "Done compiling training set"


Processing edge10_ as training data
Processing edge40 as training data
Processing edge15 as training data
Done compiling training set

In [6]:
len(trn_data)


Out[6]:
20000

In [7]:
test_data = []
test_inputs = []
test_labels = []

for f in testfnames:
    print "Adding " + f + " to test set"
    thetas = []
    xs = []
    ys = []
    fin = open(testDir+f, 'r')
    for line in fin.readlines():
        if line == "\n": continue
        if line.startswith("label"):
            # Done a block
            test_labels.append(float(line.split()[1]))
            if bThetas:
                test_inputs.append(thetas)
            else:
                test_inputs.append(thetas + xs + ys)
            thetas = []
            xs = []
            ys = []
            continue
        spt = [float(x) for x in line.split()]
        xs.append([spt[0]])
        ys.append([spt[1]])
        thetas.append([spt[2]])

    fin.close()
for i in range(len(test_inputs)):
    test_data.append((np.asarray(test_inputs[i]), np.asarray(test_labels[i])))

print "Done"


Adding edge10_ to test set
Adding edge40 to test set
Adding edge15 to test set
Done

In [8]:
len(test_data)


Out[8]:
600

In [ ]:
print len(trn_data), len(test_data)

In [ ]:
nn = None

In [36]:
if bThetas:
    nn = fnn.FullyConnectedNN([400, 20, 2])
else:
    nn = fnn.FullyConnectedNN([1200, 10, 10, 6])
# nn = FullyConnectedNN([784, 100, 10])

#nn.train(training data, nEpoch, minibatch, eta, testdata=None)
train_params = [20, 100, 1.]
nn.train(trn_data, train_params[0], train_params[1], train_params[2], test_data)


Epoch 0 Accuracy:  0.781666666667
Epoch 1 Accuracy:  0.838333333333
Epoch 2 Accuracy:  0.851666666667
Epoch 3 Accuracy:  0.873333333333
Epoch 4 Accuracy:  0.883333333333
Epoch 5 Accuracy:  0.881666666667
Epoch 6 Accuracy:  0.895
Epoch 7 Accuracy:  0.891666666667
Epoch 8 Accuracy:  0.898333333333
Epoch 9 Accuracy:  0.91

In [27]:
nnSave = nn

In [24]:
trnlabels = []
for i in range(0,len(trn_data)):
    trnlabels.append(np.argmax(trn_data[i][1]))

plt.hist(trnlabels)


Out[24]:
(array([ 10229.,      0.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.,   9771.]),
 array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ]),
 <a list of 10 Patch objects>)

In [34]:
outs = []
for i in range(0,len(test_data)):
    outs.append(np.argmax(nn.feedforward(test_data[i][0])))

plt.hist(outs)


Out[34]:
(array([ 421.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,  179.]),
 array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ]),
 <a list of 10 Patch objects>)

In [ ]:
labels = []
for i in range(0,len(test_data)):
    labels.append(test_data[i][1].item())

plt.hist(labels)

In [ ]:
# Clear data for RAM
trn_data = None
test_data = None

In [12]:
edgetestdata = []
maxSample = 1000
n = 0

# unlabeledfnames = ["edge10_"]
unlabeledfnames = ["edge10_", "edge15", "edge20", "edge25_", "edge30", "edge35", "edge40"]
for f in unlabeledfnames:
    inputs = []
    filedata = []
    print "Adding " + f + " unlabeled set"
    thetas = []
    xs = []
    ys = []
    fin = open(unlabeledDir+f, 'r')
    n = 0
    for line in fin.readlines():
        if n==maxSample:
            break
        if line == "\n":
            # Done a block
            n+=1
            if bThetas:
                inputs.append(thetas)
            else:
                inputs.append(thetas + xs + ys)
            thetas = []
            xs = []
            ys = []
            continue
        spt = [float(x) for x in line.split()]
        xs.append([spt[0]])
        ys.append([spt[1]])
        thetas.append([spt[2]])
        
    fin.close()
    for i in range(len(inputs)):
#         filedata.append((np.asarray(test_inputs[i]), np.asarray(test_labels[i])))
        filedata.append((np.asarray(inputs[i])))
    edgetestdata.append(filedata)

print "Done"


Adding edge10_ unlabeled set
Adding edge15 unlabeled set
Adding edge20 unlabeled set
Adding edge25_ unlabeled set
Adding edge30 unlabeled set
Adding edge35 unlabeled set
Adding edge40 unlabeled set
Done

In [73]:
y = []
for i in range(len(edgetestdata[2])):
    y.append(np.max(nn.feedforward(edgetestdata[2][i])))
plt.plot(y[0:])


Out[73]:
[<matplotlib.lines.Line2D at 0x7f268813b210>]

In [60]:
y=[]
for i in range(0,len(edgetestdata[1])):
    y.append(np.argmax(nn.feedforward(edgetestdata[1][i])))

sigma = np.std(y)
mu = np.average(y)
print mu,sigma

mysig = np.sqrt(np.sum((y-mu)**2)/(len(y)-1))
print mysig
plt.hist(y)


0.12 0.324961536185
0.32577699929
Out[60]:
(array([ 176.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,   24.]),
 array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ]),
 <a list of 10 Patch objects>)

In [72]:
out_stats = [] # mean and std pairs
edges = [10,15,20,25,30,35,40]
N = 400
L = 3.0
rho_ = [L*L*N/float(x*x) for x in edges]

for j in range(len(rho_)):
    outs = []
    for i in range(0,len(edgetestdata[j])):
        outs.append(np.argmax(nn.feedforward(edgetestdata[j][i])))
    out_stats.append([np.average(outs), np.std(outs)])

means = [out_stats[i][0] for i in range(len(out_stats))]
stds = [out_stats[i][1] for i in range(len(out_stats))]
plt.plot(rho_[1:], means[1:],'+')


Out[72]:
[<matplotlib.lines.Line2D at 0x7f26881eadd0>]

In [71]:
plt.errorbar(rho_[1:], means[1:], yerr=stds[1:], fmt='+')


Out[71]:
<Container object of 3 artists>

In [50]:
plt.plot(edges,out_avgs,'+')


Out[50]:
[<matplotlib.lines.Line2D at 0x7f268899b190>]

In [41]:
nnSave = nn

In [42]:
path = "saves/"
savetag = "incl_15Trn_2e4TrnSamp_2"
if not os.path.isfile(path+savetag+".nn"):
    plt.plot(edges,out_avgs,'+') # Seems to need me to make the plot first
    plt.gcf().savefig(path+savetag+"_output-vs-edge.png")
    fout = open(path+savetag+".nn",'w')
    s = ""
    for n in nn.sizes: s+=str(n)+" "
    s = ""
    for t in train_params: s+=str(t)+" "
    fout.write("NN sizes: " + s + "\n")
    fout.write("training params (Nepoch, minibatch, eta): " + s + "\n")
    for w,b in zip(nnSave.W,nnSave.B):
        for wj in w:
            s = ""
            for ww in wj: s+= str(float(ww))+" "
            fout.write(s)
        s = ""
        fout.write("\n")
        for bj in b:
            s += str(float(bj)) + " "
        fout.write(s)
        fout.write("\n")
            
        
else: print "File tag already taken"



In [47]:
N = 400
L = 3.0
rho_ = [L*L*N/float(x*x) for x in edges]
print rho_


[36.0, 16.0, 9.0, 5.76, 4.0, 2.938775510204082, 2.25]

In [ ]:
import mnist_loader
trndat, valdat, testdat = mnist_loader.load_data_wrapper()

In [ ]:
nn = FullyConnectedNN([784, 100, 10])
nn.train(trndat, 21, 100, 3., testdat)