In [3]:
import numpy as np
oneOver2Pi = 1./(2.*np.pi)
In [4]:
def edgeLenCalc(block):
maxX, maxY = 0., 0.
for line in block:
spt = line.split()
spt = [float(x) for x in spt]
if spt[5] > maxX: maxX = spt[5]
if spt[7] > maxX: maxX = spt[7]
if spt[6] > maxY: maxY = spt[6]
if spt[8] > maxY: maxY = spt[8]
maxX = np.ceil(2*maxX)/2. # This gives 0.5 resolution
maxY = np.ceil(2*maxY)/2.
return maxX, maxY
In [5]:
#
# Processing parameters
#
run = "edgevar"
in_dir = "/home/michael/msc/mcmd/output/"+run+"/"
trn_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/train/"
test_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/test/"
unlabeled_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/unlabeled/"
# For trnfnames, dict of run: label
trnfnames = {"edge40": 1., "edge10": 0., "edge15": 0.}
nTrn2Test = 200 # How many train snaps channelled to test
unlabeledfnames = ["edge20", "edge25", "edge30", "edge35", "edge15",
"edge15.25", "edge15.5", "edge15.75", "edge16", "edge16.25",
"edge16.25", "edge16.5", "edge16.75", "edge17", "edge17.5",
"edge18", "edge18.5", "edge19", "edge19.5", "edge21",
"edge22", "edge23", "edge24"]
In [6]:
#
# Processing parameters
#
run = "XT"
in_dir = "/home/michael/msc/mcmd/output/"+run+"/"
trn_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/train/"
test_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/test/"
unlabeled_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/unlabeled/"
# For trnfnames, dict of run: label
# where label is 0-5 for [D, X, T, U, L, iso] respectively
trnfnames = {"X": 1., "T": 2.}
nblTrn2Test = 300 # How many train blocks channelled to test
nblPerTrnFile = 200 # Number of blocks per training file PER type (used to parse training blocks)
unlabeledfnames = []
Something that might be interesting: Compare NN performance when trained on two types of data: -> One with perfectly balanced training data -> One where each batch has only one type of config
Also, when runs that have less blocks than others, loop over the file and add these to the subsequent training files to maintain an even ratio of configurations per file
In [9]:
nblList
Out[9]:
In [8]:
# First count blocks of each file
nblList = {}
nblTotal = 0
nblMaxRun = 0 # nbl of biggest file
for f in trnfnames:
fin = open(in_dir+f, 'r')
n = 0
for line in fin.readlines():
if line.startswith("Nx"): continue
if line == "\n":
n+=1
nblList.update({f: n})
if n > nblMaxRun: nblMaxRun = n
nblTotal+=n
fin.close()
# How many training files?
nTrnf = (nblMaxRun - nblTrn2Test) / nblPerTrnFile
if (nblMaxRun - nblTrn2Test)%nblPerTrnFile != 0: nTrnf+=1
In [10]:
nblMaxRun
Out[10]:
In [21]:
for f in trnfnames:
print "processing " + f + " for training data"
fin = open(in_dir+f,'r')
fout = open(trn_dir+f,'w')
bAddTest = False
if nblList[f] > (nTrn2Test + nblPerTrnFile):
fout_test = open(test_dir+f, 'w')
fout_unlbl = open(unlabeled_dir+f,'w')
bAddTest = True
# calculate edge length based on vertices of first block
block = []
fin.readline() # Skip header
for line in fin.readlines():
if line == "\n": break
block.append(line)
maxX, maxY = edgeLenCalc(block)
normX, normY = 1./maxX, 1./maxY #factors to normalize x,y
fin.seek(0)
nbl = 0
fin.readline() # Skip header
for line in fin.readlines():
if line.startswith("Nx"): continue
if (nbl < nTrn2Test) and bAddTest:
if line == "\n":
nbl+=1
fout_test.write('label %f\n\n' % (trnfnames[f]))
fout_unlbl.write("\n")
continue
spt = [float(x) for x in line.split()]
th = spt[4]
if nbl%2 == 0:
# rotate every other block pi/2
# note thetas should be [0,2pi]
th -= np.pi*0.5
if th < 0.: th+=2.*np.pi
th *= oneOver2Pi
fout_test.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
fout_unlbl.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
else:
if line == "\n":
nbl+=1
fout.write('label %f\n\n' % (trnfnames[f]))
continue
spt = [float(x) for x in line.split()]
x,y,th = spt[2],spt[3],spt[4]
if nbl%2 == 0:
# rotate every other block pi/2
# note thetas should be [0,2pi]
th -= np.pi*0.5
if th < 0.: th+=2.*np.pi
th *= oneOver2Pi
# write to file
fout.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
fout.close()
fin.close()
print "Done processing training files"
In [ ]:
for f in unlabeledfnames:
print "processing " + f + " for unlabeled data"
fin = open(in_dir+f, 'r')
fout = open(unlabeled_dir+f,'w')
# calculate edge length based on vertices of first block
block = []
for line in fin.readlines():
if line == "\n": break
block.append(line)
maxX, maxY = edgeLenCalc(block)
fin.seek(0)
for line in fin.readlines():
if line == "\n":
fout.write("\n")
continue
spt = line.split(" ")
th = float(spt[4])
if th>np.pi: th-=np.pi
# Append phase index
fout.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
fout.close()
fin.close()
print "Done processing unlabeled data"
In [ ]:
def ordercalc(block):
thetas = []
for line in block:
spt = line.split()
spt = [float(x) for x in spt]
thetas.append([spt[i] for i in [0,1,4,9,10,11,12,13,14,15,16]])
nbrthetas = []
for t in thetas:
select = []
for tt in thetas:
if tt[0] == t[0]: continue
if tt[1] == t[1]:
select.append(tt[2])
# If we want to include neighbor cells...
# for ni in t[3:]:
# if ni == tt[1]:
# select.append(tt[2])
nbrthetas.append([t[2], select])
S = 0.
N = 0
for r in nbrthetas:
for t in r[1]:
S += np.cos(2.*(r[0] - t))
N += 1
return S/N