In [1]:
import numpy as np
import os
import random
import matplotlib.pyplot as plt
%matplotlib inline
import FullyConnectedNN as fnn
In [2]:
#
# Analysis parameters
#
run = "edgevar"
trnDir = "data/"+run+"/train/"
testDir = "data/"+run+"/test/"
unlabeledDir = "data/"+run+"/unlabeled/"
bThetas = True
trnfnames = ["edge10", "edge40", "edge15"]
testfnames = ["edge10", "edge40", "edge15"]
unlabeledfnames = ["edge15", "edge15.25", "edge15.5", "edge15.75", "edge16",
"edge16.25", "edge16.5", "edge16.75", "edge17", "edge17.5",
"edge18", "edge18.5", "edge19", "edge19.5", "edge20", "edge21",
"edge22", "edge23", "edge25", "edge30"]
In [3]:
nTrn = 0
nTrn_lbl = 0
maxTrn = 20000
# Compile training set
trn_data = []
trn_inputs = []
trn_labels = []
#for aFile in os.listdir(trnInpDir):
for f in trnfnames:
print "Processing " + f + " as training data"
thetas = []
xs = []
ys = []
fin = open(trnDir+f, 'r')
c = 0
for line in fin.readlines():
c +=1
if line == "\n": continue
if line.startswith("label"):
# Done a block
if len(thetas) != 400:
print c,len(thetas)
xs = []
ys = []
thetas = []
continue
label = [[0.], [0.]]
idx = int(float(line.split()[1]))
label[idx] = [1.]
trn_labels.append(label)
if bThetas:
trn_inputs.append(thetas)
else:
trn_inputs.append(thetas + xs + ys)
thetas = []
xs = []
ys = []
nTrn+=1
if nTrn == maxTrn: break
continue
spt = [float(x) for x in line.split()]
xs.append([spt[0]])
ys.append([spt[1]])
thetas.append([spt[2]])
fin.close()
# label = [[float(x)] for x in line.split()]
# trn_labels.append(label)
# nTrn_lbl+=1
# if nTrn_lbl == maxTrn: break
if nTrn == maxTrn: break
for i in range(len(trn_inputs)):
trn_data.append((np.asarray(trn_inputs[i]), np.asarray(trn_labels[i])))
random.shuffle(trn_data)
print "Done compiling training set"
In [17]:
len(trn_data)
Out[17]:
In [30]:
test_data = []
test_inputs = []
test_labels = []
for f in testfnames:
print "Adding " + f + " to test set"
thetas = []
xs = []
ys = []
fin = open(testDir+f, 'r')
for line in fin.readlines():
if line == "\n": continue
if line.startswith("label"):
# Done a block
test_labels.append(float(line.split()[1]))
if bThetas:
test_inputs.append(thetas)
else:
test_inputs.append(thetas + xs + ys)
thetas = []
xs = []
ys = []
continue
spt = [float(x) for x in line.split()]
xs.append([spt[0]])
ys.append([spt[1]])
thetas.append([spt[2]])
fin.close()
for i in range(len(test_inputs)):
test_data.append((np.asarray(test_inputs[i]), np.asarray(test_labels[i])))
print "Done"
In [23]:
len(test_data)
Out[23]:
In [22]:
print len(trn_data), len(test_data)
In [26]:
nn = None
In [16]:
reload(fnn)
Out[16]:
In [33]:
if bThetas:
nn = fnn.FullyConnectedNN([400, 100, 2])
else:
nn = fnn.FullyConnectedNN([1200, 100, 2])
# nn = FullyConnectedNN([784, 100, 10])
#nn.train(training data, nEpoch, minibatch, eta, testdata=None)
train_params = [20, 100, 5.]
nn.train(trn_data, train_params[0], train_params[1], train_params[2], test_data)
In [10]:
nnSave = nn
In [11]:
trnlabels = []
for i in range(0,len(trn_data)):
trnlabels.append(np.argmax(trn_data[i][1]))
plt.hist(trnlabels)
Out[11]:
In [27]:
outs = []
labels = []
corr = []
for i in range(0,len(test_data)):
labels.append(float(test_data[i][1]))
outs.append(float(np.argmax(nn.feedforward(test_data[i][0]))))
corr.append(labels[-1]==outs[-1])
ncorr1 = 0
ncorr2 = 0
n1, n2 = 0, 0
for i in range(len(labels)):
if labels[i] == 0.:
n1+=1
ncorr1+=corr[i]
elif labels[i] == 1.:
n2+=1
ncorr2+=corr[i]
print n1, ncorr1, n2, ncorr2
print "Fraction of correct for label 1: ", float(ncorr1)/n1
print "Fraction of correct for label 2: ", float(ncorr2)/n2
plt.hist(labels, alpha=0.5, color='b')
plt.hist(outs, alpha=0.5, color='r')
Out[27]:
In [28]:
# Clear data for RAM
trn_data = None
test_data = None
In [29]:
edges = [15,15.25,15.75,16,16.5,17,17.5,18,19,20,23,25,30]
# edges = [15,15.25,15.5,15.75,16,16.25,16.5,16.75,17,17.5,18,18.5,19,19.5,20,21,22,23,25,30]
N = 400
L = 3.0
rho_ = [L*L*N/float(x*x) for x in edges]
unlabeledfnames = ["edge15", "edge15.25", "edge15.75", "edge16",
"edge16.5", "edge17", "edge17.5",
"edge18", "edge19", "edge20",
"edge23", "edge25", "edge30"]
In [15]:
maxSample = 2000
n = 0
out_stats = [] # mean and std pairs
# unlabeledfnames = ["edge10_"]
# unlabeledfnames = []
# unlabeledfnames = ["edge10", "edge15", "edge20", "edge25_", "edge30", "edge35", "edge40"]
for f in unlabeledfnames:
inputs = []
filedata = []
print "Adding " + f + " unlabeled set"
thetas = []
xs = []
ys = []
fin = open(unlabeledDir+f, 'r')
n = 0
for line in fin.readlines():
if n==maxSample:
break
if line == "\n":
# Done a block
n+=1
if bThetas:
inputs.append(thetas)
else:
inputs.append(thetas + xs + ys)
thetas = []
xs = []
ys = []
continue
spt = [float(x) for x in line.split()]
xs.append([spt[0]])
ys.append([spt[1]])
thetas.append([spt[2]])
fin.close()
for i in range(len(inputs)):
# filedata.append((np.asarray(test_inputs[i]), np.asarray(test_labels[i])))
filedata.append((np.asarray(inputs[i])))
# edgetestdata.append(filedata)
outs = []
print len(filedata)
for i in range(0,len(filedata)):
outs.append(np.argmax(nn.feedforward(filedata[i])))
out_stats.append([np.average(outs), np.std(outs)])
print out_stats[-1]
print "Done"
In [13]:
rho_
Out[13]:
In [73]:
y = []
for i in range(len(edgetestdata[2])):
y.append(np.max(nn.feedforward(edgetestdata[2][i])))
plt.plot(y[0:])
Out[73]:
In [14]:
print rho_
print means
In [16]:
edges
Out[16]:
In [16]:
means = [out_stats[i][0] for i in range(len(out_stats))]
stds = [out_stats[i][1] for i in range(len(out_stats))]
# plt.plot(rho_[0:], means[0:],'+')
plt.plot(edges, means,'+')
Out[16]:
In [78]:
out_stats = [] # mean and std pairs
edges = [10,15,20,25,30,35,40]
N = 400
L = 3.0
rho_ = [L*L*N/float(x*x) for x in edges]
for j in range(len(rho_)):
outs = []
for i in range(0,len(edgetestdata[j])):
outs.append(np.argmax(nn.feedforward(edgetestdata[j][i])))
out_stats.append([np.average(outs), np.std(outs)])
means = [out_stats[i][0] for i in range(len(out_stats))]
stds = [out_stats[i][1] for i in range(len(out_stats))]
plt.plot(rho_[1:], means[1:],'+')
Out[78]:
In [71]:
plt.errorbar(rho_[1:], means[1:], yerr=stds[1:], fmt='+')
Out[71]:
In [50]:
plt.plot(edges,out_avgs,'+')
Out[50]:
In [41]:
nnSave = nn
In [42]:
path = "saves/"
savetag = "incl_15Trn_2e4TrnSamp_2"
if not os.path.isfile(path+savetag+".nn"):
plt.plot(edges,out_avgs,'+') # Seems to need me to make the plot first
plt.gcf().savefig(path+savetag+"_output-vs-edge.png")
fout = open(path+savetag+".nn",'w')
s = ""
for n in nn.sizes: s+=str(n)+" "
s = ""
for t in train_params: s+=str(t)+" "
fout.write("NN sizes: " + s + "\n")
fout.write("training params (Nepoch, minibatch, eta): " + s + "\n")
for w,b in zip(nnSave.W,nnSave.B):
for wj in w:
s = ""
for ww in wj: s+= str(float(ww))+" "
fout.write(s)
s = ""
fout.write("\n")
for bj in b:
s += str(float(bj)) + " "
fout.write(s)
fout.write("\n")
else: print "File tag already taken"
In [47]:
N = 400
L = 3.0
rho_ = [L*L*N/float(x*x) for x in edges]
print rho_
In [ ]:
import mnist_loader
trndat, valdat, testdat = mnist_loader.load_data_wrapper()
In [ ]:
nn = FullyConnectedNN([784, 100, 10])
nn.train(trndat, 21, 100, 3., testdat)