In [3]:
import numpy as np
oneOver2Pi = 1./(2.*np.pi)

In [4]:
def edgeLenCalc(block):
    maxX, maxY = 0., 0.
    for line in block:
        spt = line.split()
        spt = [float(x) for x in spt]
        if spt[5] > maxX: maxX = spt[5]
        if spt[7] > maxX: maxX = spt[7]
        if spt[6] > maxY: maxY = spt[6]
        if spt[8] > maxY: maxY = spt[8]
    maxX = np.ceil(2*maxX)/2. # This gives 0.5 resolution
    maxY = np.ceil(2*maxY)/2.
    return maxX, maxY

In [5]:
#
# Processing parameters
#
run = "edgevar"
in_dir = "/home/michael/msc/mcmd/output/"+run+"/"
trn_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/train/"
test_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/test/"
unlabeled_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/unlabeled/"

# For trnfnames, dict of run: label
trnfnames = {"edge40": 1., "edge10": 0., "edge15": 0.}
nTrn2Test = 200 # How many train snaps channelled to test

unlabeledfnames = ["edge20", "edge25", "edge30", "edge35", "edge15", 
                   "edge15.25", "edge15.5", "edge15.75", "edge16", "edge16.25", 
                   "edge16.25", "edge16.5", "edge16.75", "edge17", "edge17.5",
                   "edge18", "edge18.5", "edge19", "edge19.5", "edge21", 
                   "edge22", "edge23", "edge24"]

In [6]:
#
# Processing parameters
#
run = "XT"
in_dir = "/home/michael/msc/mcmd/output/"+run+"/"
trn_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/train/"
test_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/test/"
unlabeled_dir = "/home/michael/msc/mcmd/nn/fnn/data/"+run+"/unlabeled/"

# For trnfnames, dict of run: label
# where label is 0-5 for [D, X, T, U, L, iso] respectively
trnfnames = {"X": 1., "T": 2.}

nblTrn2Test = 300 # How many train blocks channelled to test
nblPerTrnFile = 200 # Number of blocks per training file PER type (used to parse training blocks)

unlabeledfnames = []

Something that might be interesting: Compare NN performance when trained on two types of data: -> One with perfectly balanced training data -> One where each batch has only one type of config

Also, when runs that have less blocks than others, loop over the file and add these to the subsequent training files to maintain an even ratio of configurations per file


In [9]:
nblList


Out[9]:
{'T': 8687, 'X': 10001}

In [8]:
# First count blocks of each file
nblList = {}
nblTotal = 0
nblMaxRun = 0 # nbl of biggest file
for f in trnfnames:
    fin = open(in_dir+f, 'r')
    n = 0
    for line in fin.readlines():
        if line.startswith("Nx"): continue
        if line == "\n":
            n+=1
    nblList.update({f: n})
    if n > nblMaxRun: nblMaxRun = n
    nblTotal+=n
    fin.close()

# How many training files?
nTrnf = (nblMaxRun - nblTrn2Test) / nblPerTrnFile
if (nblMaxRun - nblTrn2Test)%nblPerTrnFile != 0: nTrnf+=1

In [10]:
nblMaxRun


Out[10]:
10001

In [21]:
for f in trnfnames:
    print "processing " + f + " for training data"
    fin = open(in_dir+f,'r')
    fout = open(trn_dir+f,'w')
    
    bAddTest = False
    if nblList[f] > (nTrn2Test + nblPerTrnFile):
        fout_test = open(test_dir+f, 'w')
        fout_unlbl = open(unlabeled_dir+f,'w')        
        bAddTest = True
    # calculate edge length based on vertices of first block
    block = []
    fin.readline() # Skip header
    for line in fin.readlines():
        if line == "\n": break
        block.append(line)
    maxX, maxY = edgeLenCalc(block)
    normX, normY = 1./maxX, 1./maxY #factors to normalize x,y
    fin.seek(0)

    nbl = 0
    fin.readline() # Skip header
    for line in fin.readlines():
        if line.startswith("Nx"): continue
        if (nbl < nTrn2Test) and bAddTest:
            if line == "\n":
                nbl+=1
                fout_test.write('label %f\n\n' % (trnfnames[f]))
                fout_unlbl.write("\n")
                continue
            spt = [float(x) for x in line.split()]
            th = spt[4]
            if nbl%2 == 0:
                # rotate every other block pi/2
                # note thetas should be [0,2pi]
                th -= np.pi*0.5
                if th < 0.: th+=2.*np.pi
                th *= oneOver2Pi
            fout_test.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
            fout_unlbl.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
        else:
            if line == "\n":
                nbl+=1
                fout.write('label %f\n\n' % (trnfnames[f]))
                continue
            spt = [float(x) for x in line.split()]
            x,y,th = spt[2],spt[3],spt[4]
            if nbl%2 == 0:
                # rotate every other block pi/2
                # note thetas should be [0,2pi]
                th -= np.pi*0.5
                if th < 0.: th+=2.*np.pi
            th *= oneOver2Pi
                
            # write to file
            fout.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
    fout.close()
    fin.close()
print "Done processing training files"


processing X for training data
processing T for training data
Done processing training files

In [ ]:
for f in unlabeledfnames:
    print "processing " + f + " for unlabeled data"
    fin = open(in_dir+f, 'r')
    fout = open(unlabeled_dir+f,'w')
    
    # calculate edge length based on vertices of first block
    block = []
    for line in fin.readlines():
        if line == "\n": break
        block.append(line)
    maxX, maxY = edgeLenCalc(block)
    fin.seek(0)

    for line in fin.readlines():
        if line == "\n":
            fout.write("\n")
            continue
        spt = line.split(" ")
        th = float(spt[4])
        if th>np.pi: th-=np.pi
        # Append phase index
        fout.write('%f %f %f\n' % (float(spt[2])/maxX, float(spt[3])/maxY, th))
    fout.close()
    fin.close()
print "Done processing unlabeled data"

In [ ]:
def ordercalc(block):
    thetas = []
    for line in block:
        spt = line.split()
        spt = [float(x) for x in spt]
        thetas.append([spt[i] for i in [0,1,4,9,10,11,12,13,14,15,16]])
        
    nbrthetas = []
    for t in thetas:
        select = []
        for tt in thetas:
            if tt[0] == t[0]: continue
            if tt[1] == t[1]:
                select.append(tt[2])
            # If we want to include neighbor cells...
#             for ni in t[3:]:
#                 if ni == tt[1]:
#                     select.append(tt[2])
        nbrthetas.append([t[2], select])

    S = 0.
    N = 0
    for r in nbrthetas:
        for t in r[1]:
            S += np.cos(2.*(r[0] - t))
            N += 1
    return S/N