In [1]:
import numpy as np
import random
twopi = 2.*np.pi
oneOver2Pi = 1./twopi
In [2]:
import time
def time_usage(func):
def wrapper(*args, **kwargs):
beg_ts = time.time()
retval = func(*args, **kwargs)
end_ts = time.time()
print("elapsed time: %f" % (end_ts - beg_ts))
return retval
return wrapper
In [3]:
#
# Processing parameters
#
run = "bigbox"
in_dir = "/home/walterms/project/walterms/mcmd/output/"+run+"/"
trn_dir = "/home/walterms/project/walterms/mcmd/nn/data/train/"
test_dir = "/home/walterms/project/walterms/mcmd/nn/data/test/"
unlabeled_dir = "/home/walterms/project/walterms/mcmd/nn/data/unlbl/"
# For trnfnames, dict of run: label
# [iso, D, T, X, U, L]
# trnfnames = {"edge15.00": 1, "edge30.00": 0}
unlblfnames = ["bigbox1", "bigbox2"]
# nbl parameters
# Use -1 to mean all
nblSkip = 1 # Skip first few images
nblPerTrnFile = -1
nblTrn2Test = 500 # How many train blocks channelled to test
nblUnlbl = 1000
In [4]:
# First count blocks of each file
nblList = {}
nblTotal = 0
nblMaxRun = 0 # nbl of biggest file
for f in trnfnames:
fin = open(in_dir+f, 'r')
n = 0
for line in fin.readlines():
if line == "\n":
n+=1
nblList.update({f: n})
if n > nblMaxRun: nblMaxRun = n
nblTotal+=n
fin.close()
# How many training files?
nTrnf = (nblMaxRun - nblTrn2Test) / nblPerTrnFile
if (nblMaxRun - nblTrn2Test)%nblPerTrnFile != 0: nTrnf+=1
print nblList
In [11]:
processTrain()
In [5]:
def processTrain():
maxtrn = 10000
for f in trnfnames:
print "processing " + f + " for training data"
fin = open(in_dir+f,'r')
outfname = f
fout = open(trn_dir+outfname,'w')
bAddTest = False
if nblList[f] > (nblTrn2Test + nblPerTrnFile):
fout_test = open(test_dir+outfname, 'w')
fout_unlbl = open(unlabeled_dir+outfname,'w')
bAddTest = True
# find width from file header
width, height = 0., 0.
l = fin.readline().split("|")
for ll in l:
if "boxEdge" in ll:
width = float(ll.split()[1])
height = width
fin.seek(0)
if width == 0.:
# calculate edge length based on vertices of first block
block = []
for line in fin.readlines():
if line == "\n": break
if line[0].isalpha(): continue
block.append(line)
fin.seek(0)
width, height = edgeLenCalc(block)
if not (fin.readline()[0].isalpha()): fin.seek(0)
thNorm = oneOver2Pi
normX, normY = 1./width, 1./height # normalize x and y
nbl = 0
fRot = 0. # rotation factor: 0,1,2,3. Multiplied by pi/2
block = []
for line in fin.readlines():
if line == "\n":
# Done a block
fRot = random.randint(0,3)
if (nbl<nblSkip): # skip the first few imgs
nbl+=1
continue
if (nbl>499) and (nbl<nblTrn2Test+500) and bAddTest:
# Channel some images to the test and unlabeled sets
for l in block:
fout_test.write('%f %f %f\n' % (l[0], l[1], l[2]))
fout_test.write('label %f\n\n' % (trnfnames[f]))
if (nbl == nblTrn2Test+499) and bAddTest:
fout_test.close()
if (nbl>499) and (nbl<nblUnlbl+500) and bAddTest:
for l in block:
fout_unlbl.write('%f %f %f\n' % (l[0], l[1], l[2]))
fout_unlbl.write("\n")
if (nbl == nblUnlbl+499) and bAddTest:
fout_unlbl.close()
if (nbl<500) or ((nbl>nblTrn2Test+500) and (nbl>nblUnlbl+500)):
for l in block:
fout.write('%f %f %f\n' % (l[0], l[1], l[2]))
fout.write('label %f\n\n' % (trnfnames[f]))
if nbl>maxtrn: break
block = []
nbl+=1
continue
if nbl<nblSkip: continue
spt = [float(x) for x in line.split()]
x,y,th = spt[2],spt[3],spt[4]
# Rotate block
# note thetas should be [0,2pi] initially
th_ = fRot*twopi*0.25
th += th_
if th > twopi: th-=twopi
th *= thNorm
x = np.cos(th_)*spt[2] - np.sin(th_)*spt[3]
y = np.sin(th_)*spt[2] + np.cos(th_)*spt[3]
# shift and normalize
x *= normX
y *= normY
block.append([x,y,th])
fout.close()
fin.close()
print "Done processing training files"
In [11]:
edges = []
edgefile = open("/home/walterms/mcmd/nn/edgelist","r")
unlblfnames = []
for e in edgefile.readlines():
edges.append(e.strip())
unlblfnames.append("edge"+e.strip())
In [12]:
unlblfnames
Out[12]:
In [5]:
nblUnlbl = 5000
for f in unlblfnames:
nbl = 0
print "processing " + f + " for unlabeled data"
fin = open(in_dir+f, 'r')
fout = open(unlabeled_dir+f, 'w')
# find width from file header
width, height = 0., 0.
l = fin.readline().split("|")
for ll in l:
if "boxEdge" in ll:
width = float(ll.split()[1])
height = width
fin.seek(0)
if width == 0.:
# calculate edge length based on vertices of first block
block = []
for line in fin.readlines():
if line == "\n": break
if line[0].isalpha(): continue
block.append(line)
fin.seek(0)
width, xheight = edgeLenCalc(block)
if not (fin.readline()[0].isalpha()): fin.seek(0)
normX, normY = 1./width, 1./height # normalize x and y
thNorm = oneOver2Pi
fRot = 0
# adjust nblunlbl if needed
if nblUnlbl == -1:
nblUnlbl = nblList[f]
for line in fin.readlines():
# if nbl < nblList[f]-nblUnlbl:
# if line == "\n":
# nbl += 1
# continue
if line == "\n":
fout.write("\n")
nbl+=1
fRot = random.randint(0,3)
if nbl > nblUnlbl:
break
else: continue
spt = [float(x) for x in line.split()]
x,y,th,ID = spt[2],spt[3],spt[4],spt[0]
# Rotate block
# note thetas should be [0,2pi]
th_ = fRot*twopi*0.25
th += th_
if th > twopi: th-=twopi
th *= oneOver2Pi
x = np.cos(th_)*spt[2] - np.sin(th_)*spt[3]
y = np.sin(th_)*spt[2] + np.cos(th_)*spt[3]
x *= normX
y *= normY
fout.write('%f %f %f %f\n' % (x, y, th, ID))
fout.close()
fin.close()
print "Done"